diff --git a/xlhtml/ChangeLog b/xlhtml/ChangeLog index 4b844b0..5205d49 100644 --- a/xlhtml/ChangeLog +++ b/xlhtml/ChangeLog @@ -1,16 +1,22 @@ xlHtml ChangeLog HEAD - * Temporary fix for non-Unicode 8-bit characters in utf-8 output until - real charset conversion is in place - * Fixed some typos and formatting - * Fixed closing of tag - * Fixed raw Unicode (A.B.C.D., where . is character 0x00) in output - when string in XLS begins as 8-bit but continues as 16-bit - * Fixed invalid XML output - missing quotes aroud size attribute - of - * Made rowspan and colspan attributes instead of content of - in XML output + - Vaclav Dvorak: + * Temporary fix for non-Unicode 8-bit characters in utf-8 output until + real charset conversion is in place + * Fixed some typos and formatting + * Fixed closing of tag + * Fixed invalid XML output - missing quotes aroud size attribute + of + * Made rowspan and colspan attributes instead of content of + in XML output + * Updated doxygen.conf and in-source documentation and comments + * Fixed SST code when a string on the boundary of a BIFF SST and + CONTINUE starts as Unicode and continues as 8-bit or vice versa; + indication: raw Unicode (A.b.c.d., where . is character 0x00) in + output, or MaxStringsExceeded + * Enabled compilation warnings, made sure there are none + * More code modularization * Added some Alpha portability fixes. 0.5 04/13/02 diff --git a/xlhtml/Makefile.am b/xlhtml/Makefile.am index b68d6cd..0c02a1f 100644 --- a/xlhtml/Makefile.am +++ b/xlhtml/Makefile.am @@ -12,9 +12,9 @@ LDADD = ../cole/libcole.a -lm xlhtml_SOURCES = support.c xlhtml.c html.c ascii.c xml.c xldump_SOURCES = xldump.c xlcdump_SOURCES = xlcdump.c -#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith +AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith -doc: +doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h @echo Generating documentation... doxygen doxygen.conf @echo done. diff --git a/xlhtml/Makefile.in b/xlhtml/Makefile.in index cfdfa7a..ecf9d6e 100644 --- a/xlhtml/Makefile.in +++ b/xlhtml/Makefile.in @@ -109,6 +109,7 @@ xlcdump_LDADD = $(LDADD) xlcdump_DEPENDENCIES = ../cole/libcole.a xlcdump_LDFLAGS = SCRIPTS = $(bin_SCRIPTS) $(noinst_SCRIPTS) +AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) @@ -383,9 +384,7 @@ install-am install uninstall-am uninstall all-redirect all-am all \ installdirs mostlyclean-generic distclean-generic clean-generic \ maintainer-clean-generic clean mostlyclean distclean maintainer-clean -#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith - -doc: +doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h @echo Generating documentation... doxygen doxygen.conf @echo done. diff --git a/xlhtml/TODO b/xlhtml/TODO index 2249862..0759a32 100644 --- a/xlhtml/TODO +++ b/xlhtml/TODO @@ -1,5 +1,13 @@ -0.4 ? -Fix nasty Unicode bug when very large shared string tables are encountereds -Support String formulas -Reduce memory footprint -Support data types by using their format string +- Update documentation +- Implement real character set conversion (using iconv); there should be a + command-line option to set output charset and another to override input + charset. Default: auto-detection for both. +- Define an XML DTD and stick to it. Look into the OpenOffice spreadsheet + format - we don't need two different spreadsheet XML formats... +- Modularize code. Separate XLS reading into a library, make a frontend with + various output formats: XML, HTML, CSV... +- Make main_line_processor()'s working buffer grow dynamically +- Notes from Steve Grubb: + - Support String formulas + - Reduce memory footprint + - Support data types by using their format string diff --git a/xlhtml/ascii.c b/xlhtml/ascii.c index 1657bbc..03681ea 100644 --- a/xlhtml/ascii.c +++ b/xlhtml/ascii.c @@ -1,21 +1,29 @@ +/*! \file ascii.c + \brief ASCII and CSV output for xlhtml +*/ + +/* + Copyright 2002 Charles N Wyble + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #include "xlhtml.h" +#include "support.h" -extern void do_cr(void); -extern int first_sheet; -extern int last_sheet; -extern void OutputString(uni_string * ); -extern char filename[256]; -extern void output_cell( cell *, int); -extern int Csv; -work_sheet **ws_array; -xf_attr **xf_array; - -extern int IsCellNumeric(cell *); -extern int IsCellSafe(cell *); -extern int IsCellFormula(cell *); -extern void output_formatted_data(uni_string *, U16, int, int); -extern void SetupExtraction(void); void OutputPartialTableAscii(void) diff --git a/xlhtml/doxygen.conf b/xlhtml/doxygen.conf index c622d01..0d75b4a 100644 --- a/xlhtml/doxygen.conf +++ b/xlhtml/doxygen.conf @@ -1,164 +1,732 @@ -# Doxyfile 0.1 +# Doxyfile 1.2.3 + +# This file describes the settings to be used by doxygen for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # General configuration options #--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + PROJECT_NAME = xlhtml + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + OUTPUT_DIRECTORY = doc + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Dutch, French, Italian, Czech, Swedish, German, Finnish, Japanese, +# Korean, Hungarian, Spanish, Romanian, Russian, Croatian, Polish, +# Portuguese and Slovene. + OUTPUT_LANGUAGE = English + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + EXTRACT_STATIC = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these class will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + HIDE_UNDOC_CLASSES = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + REPEAT_BRIEF = YES + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + ALWAYS_DETAILED_SEC = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. It is allowed to use relative paths in the argument list. + STRIP_FROM_PATH = + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + INTERNAL_DOCS = NO + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a class diagram (in Html and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. + CLASS_DIAGRAMS = NO + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. + SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + STRIP_CODE_COMMENTS = YES + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower case letters. If set to YES upper case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# users are adviced to set this option to NO. + CASE_SENSE_NAMES = YES -SHORT_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + HIDE_SCOPE_NAMES = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + VERBATIM_HEADERS = NO -SHOW_INCLUDE_FILES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explict @brief command for a brief description. + JAVADOC_AUTOBRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# reimplements. + INHERIT_DOCS = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + INLINE_INFO = NO + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + SORT_MEMBER_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + TAB_SIZE = 8 -GENERATE_TODOLIST = YES -GENERATE_TESTLIST = YES -GENERATE_BUGLIST = YES -ALIASES = + +# The ENABLE_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + ENABLED_SECTIONS = -MAX_INITIALIZER_LINES = 30 -OPTIMIZE_OUTPUT_FOR_C = YES -SHOW_USED_FILES = YES + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- -QUIET = NO + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + WARN_IF_UNDOCUMENTED = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + WARN_LOGFILE = + #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- -INPUT = xlhtml.c + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + EXCLUDE = + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + EXAMPLE_PATTERNS = + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. + INPUT_FILTER = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse. + FILTER_SOURCE_FILES = NO + #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + IGNORE_PREFIX = + #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + HTML_OUTPUT = html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet + HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + GENERATE_HTMLHELP = NO -GENERATE_CHI = NO -BINARY_TOC = NO -TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + ENUM_VALUES_PER_LINE = 4 -GENERATE_TREEVIEW = NO -TREEVIEW_WIDTH = 250 + #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + LATEX_OUTPUT = latex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + LATEX_BATCHMODE = NO + #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimised for Word 97 and may not look very pretty with +# other RTF readers or editors. + GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using a WORD or other. +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assigments. You only have to provide +# replacements, missing definitions are set to their default value. + RTF_STYLESHEET_FILE = -RTF_EXTENSIONS_FILE = + #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + MAN_EXTENSION = .3 -MAN_LINKS = NO + #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. Warning: This feature +# is still experimental and very incomplete. + GENERATE_XML = NO + #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. + PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + EXPAND_AS_DEFINED = -SKIP_FUNCTION_MACROS = YES + #--------------------------------------------------------------------------- # Configuration::addtions related to external references #--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tagfiles. + TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + ALLEXTERNALS = NO + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + PERL_PATH = /usr/bin/perl + #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + COLLABORATION_GRAPH = YES -TEMPLATE_RELATIONS = YES + +# If the ENABLE_PREPROCESSING, INCLUDE_GRAPH, and HAVE_DOT tags are set to +# YES then doxygen will generate a graph for each documented file showing +# the direct and indirect include dependencies of the file with other +# documented files. + INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, INCLUDED_BY_GRAPH, and HAVE_DOT tags are set to +# YES then doxygen will generate a graph for each documented header file showing +# the documented files that directly or indirectly include this file + INCLUDED_BY_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + GRAPHICAL_HIERARCHY = YES + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + DOT_PATH = -DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + MAX_DOT_GRAPH_HEIGHT = 1024 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + GENERATE_LEGEND = YES -DOT_CLEANUP = YES + #--------------------------------------------------------------------------- # Configuration::addtions related to the search engine #--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + SEARCHENGINE = NO + +# The CGI_NAME tag should be the name of the CGI script that +# starts the search engine (doxysearch) with the correct parameters. +# A script with this name will be generated by doxygen. + CGI_NAME = search.cgi + +# The CGI_URL tag should be the absolute URL to the directory where the +# cgi binaries are located. See the documentation of your http daemon for +# details. + CGI_URL = + +# The DOC_URL tag should be the absolute URL to the directory where the +# documentation is located. If left blank the absolute path to the +# documentation, with file:// prepended to it, will be used. + DOC_URL = + +# The DOC_ABSPATH tag should be the absolute path to the directory where the +# documentation is located. If left blank the directory on the local machine +# will be used. + DOC_ABSPATH = + +# The BIN_ABSPATH tag must point to the directory where the doxysearch binary +# is installed. + BIN_ABSPATH = /usr/local/bin/ + +# The EXT_DOC_PATHS tag can be used to specify one or more paths to +# documentation generated for other projects. This allows doxysearch to search +# the documentation for these projects as well. + EXT_DOC_PATHS = diff --git a/xlhtml/html.c b/xlhtml/html.c index 8b8e2d9..24474da 100644 --- a/xlhtml/html.c +++ b/xlhtml/html.c @@ -1,58 +1,37 @@ +/*! \file html.c + \brief HTML output for xlhtml +*/ + +/* + Copyright 2002 Charles N Wyble + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #include "xlhtml.h" +#include "support.h" #include - -extern void do_cr(void); -extern int center_tables; -extern int first_sheet; -extern int last_sheet; -extern uni_string default_font; -extern void trim_sheet_edges(unsigned int); -extern int next_ws_title; -extern void SetupExtraction(void); -extern void update_default_font(unsigned int); -extern void OutputString(uni_string * ); -extern int default_fontsize; -extern char *default_alignment; -extern int aggressive; -extern char *lastUpdated; -extern int file_version; -extern int NoFormat; -extern int notAccurate; -extern int formula_warnings; -extern int NoHeaders; -extern int NotImplemented; -extern int Unsupported; -extern int MaxWorksheetsExceeded; -extern int MaxRowExceeded; -extern int MaxColExceeded; -extern int MaxStringsExceeded; -extern int MaxFontsExceeded; -extern int MaxPalExceeded; -extern int MaxXFExceeded; -extern int MaxFormatsExceeded; -extern char colorTab[MAX_COLORS][8]; -extern char *default_text_color; -extern char *default_background_color; -extern char *default_image; -extern char filename[256]; -extern int UnicodeStrings; -extern int CodePage; -extern char *title; -extern void update_default_alignment(unsigned int, int); -extern void output_cell( cell *, int); -extern uni_string author; -extern int null_string(U8 *); -extern unsigned int next_font; -work_sheet **ws_array; -font_attr **font_array; - +/* prototypes for functions in this file */ void output_header(void); void output_footer(void); + void OutputTableHTML(void) { int i, j, k; diff --git a/xlhtml/support.c b/xlhtml/support.c index b95d74a..0035dd9 100644 --- a/xlhtml/support.c +++ b/xlhtml/support.c @@ -1,11 +1,34 @@ +/*! \file support.c + \brief Various support functions for xlhtml. +*/ -/* Various support functions for xlhtml. */ +/* + Copyright 2002 Charles N Wyble + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "support.h" #include #include #include "../cole/cole.h" #include + + void print_version(void) { printf("xlhtml %s \nCopyright (c) 1999-2002, Charles Wyble\n" @@ -182,8 +205,8 @@ void NumToDate(long num, int *year, int *month, int *day) *year = *year % 100; } -/* noaliasdub macro avoids trouble from gcc -O2 type-based alias analysis */ typedef S32 swords[2]; +/*! avoids trouble from gcc -O2 type-based alias analysis */ #define noaliasdub(type,ptr) \ (((union{swords sw; F64 dub;} *)(ptr))->sw) diff --git a/xlhtml/tuneable.h b/xlhtml/tuneable.h index 8d4e5cc..79ae89d 100644 --- a/xlhtml/tuneable.h +++ b/xlhtml/tuneable.h @@ -1,7 +1,40 @@ +/*! \file tuneable.h + \brief Values for tuning performance of xlhtml +*/ -#define XFORMATS_INCR 64 /*!< Increments to allocate extended formats */ -#define FONTS_INCR 32 /*!< Increments to allocate fonts */ -#define WORKSHEETS_INCR 4 /*!< Increments to allocate worksheet pages */ -#define COLS_INCR (U16)24 /*!< Increments to allocate Columns per Worksheet page */ -#define ROWS_INCR (U32)128 /*!< Increments to allocate Rows per Worksheet page */ -#define STRINGS_INCR 256UL /*!< Increments to allocate the string array - */ +/* + Copyright 2002 Charles N Wyble + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +/*! \brief Increments to allocate extended formats */ +#define XFORMATS_INCR 64 + +/*! \brief Increments to allocate fonts */ +#define FONTS_INCR 32 + +/*! \brief Increments to allocate worksheet pages */ +#define WORKSHEETS_INCR 4 + +/*! \brief Increments to allocate Columns per Worksheet page */ +#define COLS_INCR (U16)24 + +/*! \brief Increments to allocate Rows per Worksheet page */ +#define ROWS_INCR (U32)128 + +/*! \brief Increments to allocate the string array */ +#define STRINGS_INCR 256UL diff --git a/xlhtml/xlhtml.c b/xlhtml/xlhtml.c index f3c1edc..e93fccc 100644 --- a/xlhtml/xlhtml.c +++ b/xlhtml/xlhtml.c @@ -1,7 +1,7 @@ /*! \file xlhtml.c - \brief converts excel files to Html + \brief converts MS Excel files to useful formats - xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel + xlhtml generates HTML, XML, csv and tab-delimited versions of MS Excel spreadsheets. */ @@ -21,11 +21,13 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ + #include "tuneable.h" #include "xlhtml.h" +#include "support.h" + static U16 HARD_MAX_ROWS = HARD_MAX_ROWS_97; @@ -117,19 +119,6 @@ const char month_abbr[12][5] = { "Jan", "Feb", "Mar", "Apr", "May", "June", /* Function Prototypes */ -/* These functions are in support.c */ -extern void print_version(void); -extern void display_usage(void); -extern void do_cr(void); -extern void OutputTableHTML(void); -extern S32 getLong(U8 *); -extern U16 getShort(U8 *); -extern void getDouble(U8 *, F64 *); -extern int null_string(U8 *); -extern void FracToTime(U8 *, int *, int *, int *, int *); -extern void NumToDate(long, int *, int *, int *); -extern void RKtoDouble(S32, F64 *); - /* This function is in xml.c */ extern void OutputTableXML(void); @@ -137,6 +126,7 @@ extern void OutputTableXML(void); void OutputPartialTableAscii(void); /* These functions are in html.c */ +extern void OutputTableHTML(void); extern void output_start_html_attr(html_attr *h, unsigned int, int); extern void output_end_html_attr(html_attr *h); extern void output_footer(void); @@ -144,21 +134,11 @@ extern void output_header(void); COLE_LOCATE_ACTION_FUNC scan_file; void main_line_processor(U16, U16, U32, U16, U8); -void SetupExtraction(void); void decodeBoolErr(U16, U16, char *); -int IsCellNumeric(cell *); -int IsCellSafe(cell *); -int IsCellFormula(cell *); -void output_cell(cell *, int); -void output_formatted_data(uni_string *, U16, int, int); void PrintFloatComma(char *, int, F64); void print_as_fraction(F64, int); -void trim_sheet_edges(unsigned int); -void update_default_font(unsigned int); void incr_f_cnt(uni_string *); int get_default_font(void); -void update_default_alignment(unsigned int, int); -void OutputString(uni_string *); void OutputCharCorrected(U8); void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run); void put_utf8(U16); @@ -187,15 +167,7 @@ char filename[256]; int file_version = 0; U32 next_string=0; unsigned int next_font=0, next_ws_title=0, next_xf=0; -U8 working_buffer[WBUFF_SIZE]; -unsigned int bufidx, buflast; /*!< Needed for working buffer */ -U8 grbit=0; /*!< Needed by the SST Opcode FC */ -U16 crun=0, cch=0; /*!< Needed by the SST Opcode FC */ -U32 extrst=0; /*!< Needed by the SST Opcode FC */ -U16 nonascii = 0; /*!< Needed by the SST Opcode FC */ int sheet_count=-2; /*!< Number of worksheets found */ -U16 last_opcode = -1; /*!< Used for the continue command */ -unsigned int cont_grbit=0, cont_str_array=0; uni_string default_font; /*!< Font for table */ int default_fontsize = 3; /*!< Default font size for table */ char *default_alignment = 0; /*!< Alignment for table */ @@ -240,7 +212,7 @@ int OutputXML = 0; /*!< Output as xml */ int DumpPage = 0; /*!< Dump page count & max cols & rows */ int Xtract = 0; /*!< Extract a range on a page. */ int MultiByte = 0; /*!< Output as multibyte */ -int NoHeaders = 0; /*!< Dont output html header */ +int NoHeaders = 0; /*!< Don't output html header */ /* Some Global Flags */ @@ -553,15 +525,10 @@ int main (int argc, char **argv) - - - - - void scan_file(COLEDIRENT *cde, void *_info) { - U32 count = 0; - U16 length=0, target=0, opcode=0, version=0; + U32 count=0; + U16 target=0, opcode=0, version=0; U8 buf[16]; COLEFILE *cf; COLERRNO err; @@ -576,28 +543,27 @@ void scan_file(COLEDIRENT *cde, void *_info) /* Read & process the file... */ while (cole_fread(cf, buf, 1, &err)) { + /* The BIFF record format: + * - byte 0: opcode + * - byte 1: version (?) + * - bytes 2 and 3: 16-bit data length + * - from byte 4: record data + */ if (count > 3) main_line_processor(opcode, version, count-4, target, buf[0]); else if (count == 0) - { /* Init everything */ - length = 0; opcode = (U16)buf[0]; - target = 80; /* ficticious number */ - } else if (count == 1) version = (U16)buf[0]; else if (count == 2) - length = (U16)buf[0]; + target = (U16)buf[0]; else if (count == 3) - { - length |= (U16)(buf[0]<<8); - target = length; - } - - if (count == (U32)(target+3)) + target |= (U16)(buf[0]<<8); + if ((count >= 3) && (count == (U32)target + 3)) count = 0; else count++; + if (MaxColExceeded || MaxRowExceeded || MaxWorksheetsExceeded) break; /* We're outta memory and therefore...done */ @@ -648,10 +614,6 @@ void scan_file(COLEDIRENT *cde, void *_info) - - - - void SetupExtraction(void) { if (Xtract) @@ -725,29 +687,41 @@ void SetupExtraction(void) } -/*!****************************************************************** -* \param count the absolute count in the record -* \param last the size of the record -* \param bufidx the index into the working buffer -* \param buflast the expected length of the working buffer -********************************************************************/ + +/*! + * This function gets called once for every single byte of record data. + * \brief Process the record data. + * \param opcode the opcode of the current BIFF record + * \param version the version (?) of the current BIFF record + * \param count the absolute offset in the record data + * \param last the size of the record data + * \param data the character being processed + */ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) { - U16 cont_opcode = 0; - - /* If first pass, reset stuff. */ + int cont_opcode = 0; /* is this the CONTINUE opcode? */ + static U16 last_opcode = -1; /* holds last non-CONTINUE opcode */ + static U8 working_buffer[WBUFF_SIZE]; + static unsigned int bufidx = 0; /* the index into the working buffer */ + static unsigned int buflast = 0; /* the expected length of the working buffer */ + /* Needed by the SST Opcode 0xFC: */ + static U8 str_options = 0; + static int buf_16bit = 0; + static int now_16bit = 0; + static int after_str_header = 0; + static U16 num_chars = 0; + static U16 num_fmt_runs = 0; + static U32 fareast_bytes = 0; + static U16 nonascii = 0; + + /* On start of record, reset stuff. */ if (count == 0) { - if (opcode != 0x3C) /* continue command */ -/* { - printf("\n* * * * * * CONTINUE * * * * * * * * *\n\n"); - } - else */ - { /* Normal path... */ + if (opcode != 0x3C) /* not CONTINUE opcode */ + { last_opcode = opcode; bufidx = 0; buflast = 0; - cont_str_array = 0; memset(working_buffer, 0, WBUFF_SIZE); } } @@ -760,8 +734,10 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) /* Abort processing if too big. Next opcode will reset everything. */ if (bufidx >= WBUFF_SIZE) { - /*printf("OC:%02X C:%04X I:%04X BL:%04X cch:%04X gr:%04X\n", opcode, count, bufidx, buflast, cch, grbit); */ - /*abort(); */ + /* this will be printed many times; leave it this way since it's temporary + * anyway - the buffer must be made dynamic + */ + fprintf(stderr, "Warning: working buffer overflow!\n"); return; } @@ -906,13 +882,13 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) { /* Remember, bufidx is 1 more than it should be */ if ((bufidx == 8)&&(buflast == 0)) { /* buflast = working_buffer[7]; */ - cch = getShort(&working_buffer[6]); - buflast = cch + 9; + num_chars = getShort(&working_buffer[6]); + buflast = num_chars + 9; } if (bufidx == 9) { if (working_buffer[8] == 1) - buflast = (cch << 1) + 9; + buflast = (num_chars << 1) + 9; } if (buflast) { @@ -933,7 +909,7 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) if (working_buffer[8] == 1) { UnicodeStrings = 2; - add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(cch << 1), 0, 0); + add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(num_chars << 1), 0, 0); } else add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8], len, 0, 0); @@ -965,160 +941,174 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) } } break; - /************ - * This function has 2 entry points. 1 is the mainline FC opcode. - * In this event there are several bytes that setup the type of - * strings that will follow. Then there is the continue entry - * point which is immediate - e.g location 0. - *************/ - case 0xFC: /* Packed String Array A.K.A. SST Shared String Table...UNI */ - if ((count > 7)||(cont_opcode == 1)) /* Skip the 1st 8 locations they are bs */ + case 0xFC: /* Packed String Array A.K.A. SST - Shared String Table */ + /* Format of the SST: + * - bytes 0-3: total number of strings in the workbook (32-bit int) + * - bytes 4-7: number of strings following this header (32-bit int) + * - from byte 8: strings; count is in the preceding int + * Format of the individual strings: + * - bytes 0-1: LN = number of characters (not bytes!) in the string (16-bit int) + * - byte 2: option flags (see below) + * - from byte 3: + * if rich-text flag set: + * - RT = number of rich-text formatting runs (16-bit int) + * if far-east flag set: + * - FE = far-east data size (32-bit int) + * LN characters, either 8-bit or 16-bit each + * if rich-text flag set: + * - RT-times repeated: + * - first formatted character, zero-based (16-bit int) + * - index to font record (16-bit int) + * if far-east flag set: + * - unknown extended data about phonetic, keyboard etc. (FE bytes) + * Option flags (byte 2 in string) is a bit field: + * - bit 0 (01h): 0 = 8-bit characters, 1 = 16-bit characters + * - bit 2 (04h): 0 = no far-east info, 1 = contains far-east info + * - bit 3 (08h): 0 = no rich-text formatting, 1 = contains rich-text formatting + * + * The CONTINUE opcode has a special case for the SST: when a string + * is split into two records, there is one additional byte at the start + * of the second record, which indicates (like the option flags at the + * start of the string) with its bit 0 whether the continuation has + * 8-bit or 16-bit characters. Thus, the strings can start with 8-bit + * characters and continue with 16-bit characters, or vice versa. + */ + if ((count == 0) && !cont_opcode) + { /* initialize variables */ + bufidx = 0; + after_str_header = 0; + } + if ((count < 8) && !cont_opcode) + /*! \todo For now, ignore the SST header. Ideally, we should + * process at most the number of strings that is stored in + * the header, ignoring the rest. Fortunately, the case when + * this would be necessary doesn't seem to occur. + */ + break; + if ((count == 0) && cont_opcode && after_str_header) { -/* if ((count == 0)&&(data == 0)&&(buflast)) */ - if ((count == 0)&&(cont_opcode == 1)&&(buflast)) - { -/* printf("Adjusting...\n"); */ -/* printf("I:%04X BL:%04X\n", bufidx, buflast); */ - cont_str_array = 1; - cont_grbit = data; - if ((cont_grbit & 0x01) && !(grbit & 0x01)) - { /* previous chunk was not Unicode but this one is */ - int i; - grbit |= 0x01; - for (i = bufidx-1; i >= 0; i--) { - working_buffer[2*i] = working_buffer[i]; - working_buffer[2*i+1] = 0; - } + now_16bit = data & 0x01; + if (now_16bit && !buf_16bit) + { /* previous chunk was not Unicode but this one is */ + int i; + /* we will be doubling the buffer contents + * => check if there is space + */ + if (2*buflast >= WBUFF_SIZE) + { + bufidx = WBUFF_SIZE + 1; + break; + } + if (bufidx >= num_chars) + { /* weird - we got a unicode flag, but we're + * actually already done with the string data + */ + now_16bit = 0; + } + else + { + buf_16bit = 1; + if (bufidx > 0) + for (i = bufidx-1; i >= 0; i--) { + working_buffer[2*i] = working_buffer[i]; + working_buffer[2*i+1] = 0; + } bufidx = 2 * bufidx; - buflast = 2 * buflast; + buflast = 2*num_chars + 4*num_fmt_runs + fareast_bytes; } - return; } + break; + } - working_buffer[bufidx] = data; - bufidx++; + working_buffer[bufidx++] = data; + if (buf_16bit && !now_16bit && after_str_header && (bufidx < (2*num_chars))) + /* ASCII -> unicode */ + working_buffer[bufidx++] = 0; - if((cont_str_array)&&(grbit & 0x01)&& !(cont_grbit & 0x01)) - { /* ASCII -> unicode */ - working_buffer[bufidx] = 0; - bufidx++; + if (! after_str_header) /* Header processor */ + { + if (bufidx == 0x03) /* After 3 locations we have length */ + { /* and option flags... */ + num_chars = getShort(&working_buffer[0]); + str_options = working_buffer[2]; + now_16bit = buf_16bit = str_options & 0x01; } - - if (buflast == 0) /* Header processor */ - { - if (bufidx == 0x03) /* After 3 locations we have length */ - { /* and type of chars... */ - cch = getShort(&working_buffer[0]); - grbit = working_buffer[2]; - - if (grbit < 0x04) /* Normal run */ - { - nonascii = 0; - bufidx = 0; - crun = 0; - extrst = 0; - buflast = cch << (grbit & 0x01); - - /* special case for empty strings */ - if (!cch && !buflast) - add_str_array(0, (U8 *)0, 0, 0, 0); - else - memset(working_buffer, 0, WBUFF_SIZE); - } - } - else if (bufidx == 0x05) - { - if ((grbit & 0x0C) == 0x08) /* Rich string only */ - { - nonascii = 0; - bufidx = 0; - crun = getShort(&working_buffer[3]); - extrst = 0; - buflast = (cch << (grbit & 0x01)) + (crun*4); -/* printf("rtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n", - buflast, cch, grbit, extrst, crun, last); - printf("%02X %02X %02X %02X %02X %02X\n", - working_buffer[0], working_buffer[1], working_buffer[2], - working_buffer[3], working_buffer[4], working_buffer[5]); */ - memset(working_buffer, 0, WBUFF_SIZE); - } - } - else if (bufidx == 0x07) - { - if ((grbit & 0x0C) == 0x04) /* Extended string only */ - { - nonascii = 0; - bufidx = 0; - crun = 0; - extrst = getLong(&working_buffer[3]); - buflast = (cch << (grbit & 0x01)) + extrst; -/* printf("esbuflast:%X cch%X grbit:%X extrst:%X last:%X\n", - buflast, cch, grbit, extrst, last); - printf("%02X %02X %02X %02X %02X %02X\n", - working_buffer[0], working_buffer[1], working_buffer[2], - working_buffer[3], working_buffer[4], working_buffer[5]); */ - memset(working_buffer, 0, WBUFF_SIZE); - } - } - else if (bufidx == 0x09) - { - if ((grbit & 0x0C) == 0x0C) - { - /* Rich String + Extended String **/ - nonascii = 0; - bufidx = 0; - crun = getShort(&working_buffer[3]); - extrst = getLong(&working_buffer[5]); - buflast = (cch << (grbit & 0x01)) + extrst + (crun*4); -/* printf("xrtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n", - buflast, cch, grbit, extrst, crun, last); - printf("%02X %02X %02X %02X %02X %02X\n", - working_buffer[0], working_buffer[1], working_buffer[2], - working_buffer[3], working_buffer[4], working_buffer[5]); */ - memset(working_buffer, 0, WBUFF_SIZE); - } - } -/* printf("*%02X ", data); */ + if ((bufidx == 0x03) && ((str_options & 0x0C) == 0)) + { /* Normal run */ + num_fmt_runs = 0; + fareast_bytes = 0; + buflast = num_chars << buf_16bit; + after_str_header = 1; } - else /* payload processor */ - { -/* if (cont_opcode == 1) - printf(" %02X", data); */ - if (data > 127) - nonascii = 1; - if (bufidx == buflast) + else if ((bufidx == 0x05) && ((str_options & 0x0C) == 0x08)) + { /* Rich-text formatted string only */ + num_fmt_runs = getShort(&working_buffer[3]); + fareast_bytes = 0; + buflast = (num_chars << buf_16bit) + (num_fmt_runs*4); + after_str_header = 1; + } + else if ((bufidx == 0x07) && ((str_options & 0x0C) == 0x04)) + { /* Extended (Far-East) string only */ + num_fmt_runs = 0; + fareast_bytes = getLong(&working_buffer[3]); + buflast = (num_chars << buf_16bit) + fareast_bytes; + after_str_header = 1; + } + else if ((bufidx == 0x09) && ((str_options & 0x0C) == 0x0C)) + { /* Rich-text formatted string + Extended (Far-East) string */ + num_fmt_runs = getShort(&working_buffer[3]); + fareast_bytes = getLong(&working_buffer[5]); + buflast = (num_chars << buf_16bit) + fareast_bytes + (num_fmt_runs*4); + after_str_header = 1; + } + else if (bufidx >= 0x09) + { /* Houston, we have a problem. This should never happen. */ + fprintf(stderr, "An error in SST processing occured. Please contact the author.\n"); + bufidx = WBUFF_SIZE + 1; /* simulate buffer overflow to skip rest of opcode */ + break; + } + if (after_str_header) + { /* the string data is now starting */ + if (buflast == 0) + { /* special case for empty strings */ + add_str_array(0, (U8 *)0, 0, 0, 0); + after_str_header = 0; + } + else { - U8 uni; - U16 len = (U16)(cch << (grbit & 0x01)); -/* int i; */ - - if (grbit & 01) - { - uni = 2; - UnicodeStrings = 2; - } - else - uni = nonascii; - working_buffer[bufidx] = 0; -/* fprintf(stderr,":buflast-"); */ -/* { int i; */ -/* for (i=0; i UnicodeStrings) /* Try to "upgrade" charset */ - UnicodeStrings = uni; - bufidx = 0; - buflast = 0; - cch = 0; - cont_str_array = 0; memset(working_buffer, 0, WBUFF_SIZE); + nonascii = 0; } + bufidx = 0; + } + } + else /* payload processor */ + { + if (data > 127) + nonascii = 1; + if (bufidx == buflast) + { + U8 uni; + U16 len = (U16)(num_chars << buf_16bit); + + if (buf_16bit) + { + uni = 2; + UnicodeStrings = 2; + } + else + uni = nonascii; + working_buffer[bufidx] = 0; + + if (num_fmt_runs) + add_str_array(uni, working_buffer, len, working_buffer+len, num_fmt_runs); + else + add_str_array(uni, working_buffer, len, 0, 0); + if (uni > UnicodeStrings) /* Try to "upgrade" charset */ + UnicodeStrings = uni; + bufidx = 0; + after_str_header = 0; + memset(working_buffer, 0, WBUFF_SIZE); } } break; @@ -1806,9 +1796,6 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data) - - - /*! returns 1 on error, 0 on success */ int ws_init(int i) { @@ -1841,6 +1828,8 @@ int ws_init(int i) return 0; } + + /*! returns 1 on error, 0 on success */ int add_more_worksheet_ptrs(void) { @@ -1877,6 +1866,8 @@ int add_more_worksheet_ptrs(void) return 0; } + + int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols) { cell **tc_array; @@ -1908,6 +1899,8 @@ int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols) return 0; } + + void add_wb_array(U16 r, U16 c, U16 xf, U16 type, U8 uni, U8 *str, U16 len, U16 crun_cnt, U8 *fmt_run) { diff --git a/xlhtml/xlhtml.h b/xlhtml/xlhtml.h index b0ad64a..caa2b00 100644 --- a/xlhtml/xlhtml.h +++ b/xlhtml/xlhtml.h @@ -1,3 +1,30 @@ +/*! \file xlhtml.h + \brief Header file for xlhtml +*/ + +/* + Copyright 2002 Charles N Wyble + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef __XLHTML_H_INCLUDED +#define __XLHTML_H_INCLUDED + + #if !(defined( __BORLANDC__ ) || defined( __WIN32__ )) #include "config.h" /* Created by ./configure script */ @@ -19,9 +46,9 @@ #include /* Used by packed string array Opcode: 0xFC */ -#define HARD_MAX_ROWS_97 0x7FFE /*!< Used in add_wb_array to prevent OOM */ -#define HARD_MAX_ROWS_95 0x3FFF /*!< Used in add_wb_array to prevent OOM */ -#define HARD_MAX_COLS 256 /*!< Used in add_wb_array to prevent OOM */ +#define HARD_MAX_ROWS_97 0x7FFE /*!< Used in add_wb_array to prevent OOM */ +#define HARD_MAX_ROWS_95 0x3FFF /*!< Used in add_wb_array to prevent OOM */ +#define HARD_MAX_COLS 256 /*!< Used in add_wb_array to prevent OOM */ /********************************** * @@ -40,27 +67,30 @@ #define GLOBAL_UMASK (2) #endif -typedef struct /*!< This encapsulates the Unicode String */ +/*! \brief This encapsulates the Unicode String */ +typedef struct { U8 uni; /*!< Unicode String: 0==ASCII/8859-1, 1==windows-1252, 2==utf-8 */ U8 *str; /*!< Characters of string */ U16 len; /*!< Length of string */ U8 *fmt_run; /*!< formatting run, short pairs: offset, index to font */ U8 crun_cnt; /*!< The count of format runs */ -}uni_string; +} uni_string; -typedef struct /*!< This is everything we need for a cell */ +/*! \brief This is everything we need for a cell */ +typedef struct { - U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */ - U16 type; /*!< This will record the record type that generated the cell */ + U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */ + U16 type; /*!< This will record the record type that generated the cell */ U16 spanned; /*!< If 1 don't output */ uni_string ustr; /*!< The cell's displayed contents */ U16 rowspan; /*!< rows to span */ U16 colspan; /*!< columns to span */ uni_string h_link; /*!< If a hyperlinked cell, this is the link*/ -}cell; +} cell; -typedef struct /*!< This encapsulates some information about each worksheet */ +/*! \brief This encapsulates some information about each worksheet */ +typedef struct { U32 first_row; S32 biggest_row; @@ -71,9 +101,10 @@ typedef struct /*!< This encapsulates some information about each worksheet */ uni_string ws_title; cell **c_array; U16 spanned; -}work_sheet; +} work_sheet; -typedef struct /*!< This is everything we need to know about fonts */ +/*! \brief This is everything we need to know about fonts */ +typedef struct { U16 size; U16 attr; @@ -82,15 +113,16 @@ typedef struct /*!< This is everything we need to know about fonts */ U16 super; U8 underline; uni_string name; -}font_attr; +} font_attr; typedef struct { uni_string *name; U16 cnt; -}fnt_cnt; +} fnt_cnt; -typedef struct /*!< This covers the Extended Format records */ +/*! \brief This covers the Extended Format records */ +typedef struct { U16 fnt_idx; U16 fmt_idx; @@ -101,9 +133,10 @@ typedef struct /*!< This covers the Extended Format records */ U16 b_l_color; U32 b_t_color; U16 cell_color; -}xf_attr; +} xf_attr; -typedef struct /*!< HTML Attribute */ +/*! \brief HTML Attribute */ +typedef struct { int fflag; /*!< Font Flag */ int bflag; /*!< Bold Flag */ @@ -112,6 +145,60 @@ typedef struct /*!< HTML Attribute */ int uflag; /*!< Underline flag */ int sbflag; /*!< Subscript */ int spflag; /*!< Superscript */ -}html_attr; +} html_attr; +extern int first_sheet; +extern int last_sheet; +extern char filename[256]; +extern char *default_text_color; +extern char *default_background_color; +extern char *default_image; +extern int aggressive; +extern int center_tables; +extern int NoHeaders; +extern int formula_warnings; +extern int Csv; +extern xf_attr **xf_array; +extern work_sheet **ws_array; +extern font_attr **font_array; +extern uni_string default_font; +extern unsigned int next_font; +extern unsigned int next_ws_title; +extern int default_fontsize; +extern char *default_alignment; +extern char *title; +extern uni_string author; +extern char *lastUpdated; +extern int file_version; +extern char colorTab[MAX_COLORS][8]; +extern int NoFormat; +extern int notAccurate; +extern int NotImplemented; +extern int Unsupported; +extern int MaxPalExceeded; +extern int MaxXFExceeded; +extern int MaxFormatsExceeded; +extern int MaxColExceeded; +extern int MaxRowExceeded; +extern int MaxWorksheetsExceeded; +extern int MaxStringsExceeded; +extern int MaxFontsExceeded; +extern int UnicodeStrings; +extern int CodePage; + + +void OutputString (uni_string *); +void output_cell (cell *, int); +int IsCellNumeric (cell *); +int IsCellSafe (cell *); +int IsCellFormula (cell *); +void output_formatted_data (uni_string *, U16, int, int); +void SetupExtraction (void); +void trim_sheet_edges (unsigned int); +void update_default_font (unsigned int); +void update_default_alignment (unsigned int, int); + + + +#endif diff --git a/xlhtml/xml.c b/xlhtml/xml.c index 9e908d3..c9a105d 100644 --- a/xlhtml/xml.c +++ b/xlhtml/xml.c @@ -1,38 +1,28 @@ +/*! \file xml.c + \brief XML output for xlhtml +*/ + +/* + Copyright 2002 Charles N Wyble + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + #include "xlhtml.h" -extern int first_sheet; -extern int last_sheet; -extern uni_string default_font; -extern void trim_sheet_edges(unsigned int); -extern int next_ws_title; -extern void SetupExtraction(void); -extern void update_default_font(unsigned int); -extern void OutputString(uni_string * ); -extern char *lastUpdated; -extern int file_version; -extern int NoFormat; -extern int notAccurate; -extern int formula_warnings; -extern int NotImplemented; -extern int Unsupported; -extern int MaxWorksheetsExceeded; -extern int MaxRowExceeded; -extern int MaxColExceeded; -extern int MaxStringsExceeded; -extern int MaxFontsExceeded; -extern int MaxPalExceeded; -extern int MaxXFExceeded; -extern int MaxFormatsExceeded; -extern char colorTab[MAX_COLORS]; -extern char filename[256]; -extern int UnicodeStrings; -extern char *title; -extern void update_default_alignment(unsigned int, int); -extern void output_cell( cell *, int); -extern uni_string author; - -work_sheet **ws_array; void OutputTableXML(void) @@ -87,8 +77,8 @@ void OutputTableXML(void) } printf( "\t\t\t%ld\n", (unsigned long)ws_array[i]->first_row ); - printf( "\t\t\t%ld\n", (int) ws_array[i]->biggest_row ); - printf( "\t\t\t%d\n", (long) ws_array[i]->first_col ); + printf( "\t\t\t%d\n", (int) ws_array[i]->biggest_row ); + printf( "\t\t\t%ld\n", (long) ws_array[i]->first_col ); printf( "\t\t\t%d\n", (int)ws_array[i]->biggest_col ); printf( "\t\t\t\n" ); @@ -114,7 +104,7 @@ void OutputTableXML(void) printf( "\t\t\t\n" ); printf("\t\t\n"); } - printf( "\t\n" ); + printf( "\t\n" ); /* Print the author's name in itallics... */ if (author.str) @@ -173,5 +163,5 @@ void OutputTableXML(void) /* Output Credit */ printf("\tCreated with xlhtml %s\n", VERSION); printf("\thttp://chicago.sf.net/xlhtml/\n"); - printf( "\n" ); + printf( "\n" ); }