Added some more fixes.

This commit is contained in:
slidedraw 2002-05-15 01:58:35 +00:00
parent 1466e3d614
commit ddf4a5df21
12 changed files with 1085 additions and 391 deletions

View File

@ -1,16 +1,22 @@
xlHtml ChangeLog
HEAD
- Vaclav Dvorak:
* Temporary fix for non-Unicode 8-bit characters in utf-8 output until
real charset conversion is in place
* Fixed some typos and formatting
* Fixed closing of <FONT> tag
* Fixed raw Unicode (A.B.C.D., where . is character 0x00) in output
when string in XLS begins as 8-bit but continues as 16-bit
* Fixed invalid XML output - missing quotes aroud size attribute
of <FONT>
* Made rowspan and colspan attributes instead of content of <cell>
in XML output
* Updated doxygen.conf and in-source documentation and comments
* Fixed SST code when a string on the boundary of a BIFF SST and
CONTINUE starts as Unicode and continues as 8-bit or vice versa;
indication: raw Unicode (A.b.c.d., where . is character 0x00) in
output, or MaxStringsExceeded
* Enabled compilation warnings, made sure there are none
* More code modularization
* Added some Alpha portability fixes.
0.5 04/13/02

View File

@ -12,9 +12,9 @@ LDADD = ../cole/libcole.a -lm
xlhtml_SOURCES = support.c xlhtml.c html.c ascii.c xml.c
xldump_SOURCES = xldump.c
xlcdump_SOURCES = xlcdump.c
#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
doc:
doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
@echo Generating documentation...
doxygen doxygen.conf
@echo done.

View File

@ -109,6 +109,7 @@ xlcdump_LDADD = $(LDADD)
xlcdump_DEPENDENCIES = ../cole/libcole.a
xlcdump_LDFLAGS =
SCRIPTS = $(bin_SCRIPTS) $(noinst_SCRIPTS)
AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
CFLAGS = @CFLAGS@
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@ -383,9 +384,7 @@ install-am install uninstall-am uninstall all-redirect all-am all \
installdirs mostlyclean-generic distclean-generic clean-generic \
maintainer-clean-generic clean mostlyclean distclean maintainer-clean
#AM_CFLAGS = -Wall -Wshadow -Wcast-align -Wpointer-arith
doc:
doc: doxygen.conf xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
@echo Generating documentation...
doxygen doxygen.conf
@echo done.

View File

@ -1,5 +1,13 @@
0.4 ?
Fix nasty Unicode bug when very large shared string tables are encountereds
Support String formulas
Reduce memory footprint
Support data types by using their format string
- Update documentation
- Implement real character set conversion (using iconv); there should be a
command-line option to set output charset and another to override input
charset. Default: auto-detection for both.
- Define an XML DTD and stick to it. Look into the OpenOffice spreadsheet
format - we don't need two different spreadsheet XML formats...
- Modularize code. Separate XLS reading into a library, make a frontend with
various output formats: XML, HTML, CSV...
- Make main_line_processor()'s working buffer grow dynamically
- Notes from Steve Grubb:
- Support String formulas
- Reduce memory footprint
- Support data types by using their format string

View File

@ -1,21 +1,29 @@
/*! \file ascii.c
\brief ASCII and CSV output for xlhtml
*/
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "xlhtml.h"
#include "support.h"
extern void do_cr(void);
extern int first_sheet;
extern int last_sheet;
extern void OutputString(uni_string * );
extern char filename[256];
extern void output_cell( cell *, int);
extern int Csv;
work_sheet **ws_array;
xf_attr **xf_array;
extern int IsCellNumeric(cell *);
extern int IsCellSafe(cell *);
extern int IsCellFormula(cell *);
extern void output_formatted_data(uni_string *, U16, int, int);
extern void SetupExtraction(void);
void OutputPartialTableAscii(void)

View File

@ -1,164 +1,732 @@
# Doxyfile 0.1
# Doxyfile 1.2.3
# This file describes the settings to be used by doxygen for a project
#
# All text after a hash (#) is considered a comment and will be ignored
# The format is:
# TAG = value [value, ...]
# For lists items can also be appended using:
# TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (" ")
#---------------------------------------------------------------------------
# General configuration options
#---------------------------------------------------------------------------
# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
# by quotes) that should identify the project.
PROJECT_NAME = xlhtml
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER =
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
# If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used.
OUTPUT_DIRECTORY = doc
# The OUTPUT_LANGUAGE tag is used to specify the language in which all
# documentation generated by doxygen is written. Doxygen will use this
# information to generate all constant output in the proper language.
# The default language is English, other supported languages are:
# Dutch, French, Italian, Czech, Swedish, German, Finnish, Japanese,
# Korean, Hungarian, Spanish, Romanian, Russian, Croatian, Polish,
# Portuguese and Slovene.
OUTPUT_LANGUAGE = English
# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
# documentation are documented, even if no documentation was available.
# Private class members and static file members will be hidden unless
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
EXTRACT_ALL = YES
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
# will be included in the documentation.
EXTRACT_PRIVATE = NO
# If the EXTRACT_STATIC tag is set to YES all static members of a file
# will be included in the documentation.
EXTRACT_STATIC = YES
# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
# undocumented members of documented classes, files or namespaces.
# If set to NO (the default) these members will be included in the
# various overviews, but no documentation section is generated.
# This option has no effect if EXTRACT_ALL is enabled.
HIDE_UNDOC_MEMBERS = NO
# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
# undocumented classes that are normally visible in the class hierarchy.
# If set to NO (the default) these class will be included in the various
# overviews. This option has no effect if EXTRACT_ALL is enabled.
HIDE_UNDOC_CLASSES = NO
# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
# include brief member descriptions after the members that are listed in
# the file and class documentation (similar to JavaDoc).
# Set to NO to disable this.
BRIEF_MEMBER_DESC = YES
# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
# the brief description of a member or function before the detailed description.
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
# brief descriptions will be completely suppressed.
REPEAT_BRIEF = YES
# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
# Doxygen will generate a detailed section even if there is only a brief
# description.
ALWAYS_DETAILED_SEC = NO
# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
# path before files name in the file list and in the header files. If set
# to NO the shortest path that makes the file name unique will be used.
FULL_PATH_NAMES = NO
# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
# can be used to strip a user defined part of the path. Stripping is
# only done if one of the specified strings matches the left-hand part of
# the path. It is allowed to use relative paths in the argument list.
STRIP_FROM_PATH =
# The INTERNAL_DOCS tag determines if documentation
# that is typed after a \internal command is included. If the tag is set
# to NO (the default) then the documentation will be excluded.
# Set it to YES to include the internal documentation.
INTERNAL_DOCS = NO
# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
# generate a class diagram (in Html and LaTeX) for classes with base or
# super classes. Setting the tag to NO turns the diagrams off.
CLASS_DIAGRAMS = NO
# If the SOURCE_BROWSER tag is set to YES then a list of source files will
# be generated. Documented entities will be cross-referenced with these sources.
SOURCE_BROWSER = NO
# Setting the INLINE_SOURCES tag to YES will include the body
# of functions and classes directly in the documentation.
INLINE_SOURCES = NO
# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
# doxygen to hide any special comment blocks from generated source code
# fragments. Normal C and C++ comments will always remain visible.
STRIP_CODE_COMMENTS = YES
# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
# file names in lower case letters. If set to YES upper case letters are also
# allowed. This is useful if you have classes or files whose names only differ
# in case and if your file system supports case sensitive file names. Windows
# users are adviced to set this option to NO.
CASE_SENSE_NAMES = YES
SHORT_NAMES = NO
# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
# will show members with their full class and namespace scopes in the
# documentation. If set to YES the scope will be hidden.
HIDE_SCOPE_NAMES = NO
# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
# will generate a verbatim copy of the header file for each class for
# which an include is specified. Set to NO to disable this.
VERBATIM_HEADERS = NO
SHOW_INCLUDE_FILES = NO
# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
# will put list of the files that are included by a file in the documentation
# of that file.
SHOW_INCLUDE_FILES = YES
# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
# will interpret the first line (until the first dot) of a JavaDoc-style
# comment as the brief description. If set to NO, the JavaDoc
# comments will behave just like the Qt-style comments (thus requiring an
# explict @brief command for a brief description.
JAVADOC_AUTOBRIEF = NO
# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
# member inherits the documentation from any documented member that it
# reimplements.
INHERIT_DOCS = YES
# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
# is inserted in the documentation for inline members.
INLINE_INFO = NO
# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
# will sort the (detailed) documentation of file and class members
# alphabetically by member name. If set to NO the members will appear in
# declaration order.
SORT_MEMBER_DOCS = YES
# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
# tag is set to YES, then doxygen will reuse the documentation of the first
# member in the group (if any) for the other members of the group. By default
# all members of a group must be documented explicitly.
DISTRIBUTE_GROUP_DOC = NO
# The TAB_SIZE tag can be used to set the number of spaces in a tab.
# Doxygen uses this value to replace tabs by spaces in code fragments.
TAB_SIZE = 8
GENERATE_TODOLIST = YES
GENERATE_TESTLIST = YES
GENERATE_BUGLIST = YES
ALIASES =
# The ENABLE_SECTIONS tag can be used to enable conditional
# documentation sections, marked by \if sectionname ... \endif.
ENABLED_SECTIONS =
MAX_INITIALIZER_LINES = 30
OPTIMIZE_OUTPUT_FOR_C = YES
SHOW_USED_FILES = YES
# The GENERATE_TODOLIST tag can be used to enable (YES) or
# disable (NO) the todo list. This list is created by putting \todo
# commands in the documentation.
GENERATE_TODOLIST = YES
# The GENERATE_TESTLIST tag can be used to enable (YES) or
# disable (NO) the test list. This list is created by putting \test
# commands in the documentation.
GENERATE_TESTLIST = YES
# This tag can be used to specify a number of aliases that acts
# as commands in the documentation. An alias has the form "name=value".
# For example adding "sideeffect=\par Side Effects:\n" will allow you to
# put the command \sideeffect (or @sideeffect) in the documentation, which
# will result in a user defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.
ALIASES =
#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------
QUIET = NO
# The QUIET tag can be used to turn on/off the messages that are generated
# by doxygen. Possible values are YES and NO. If left blank NO is used.
QUIET = YES
# The WARNINGS tag can be used to turn on/off the warning messages that are
# generated by doxygen. Possible values are YES and NO. If left blank
# NO is used.
WARNINGS = YES
# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
# automatically be disabled.
WARN_IF_UNDOCUMENTED = YES
# The WARN_FORMAT tag determines the format of the warning messages that
# doxygen can produce. The string should contain the $file, $line, and $text
# tags, which will be replaced by the file and line number from which the
# warning originated and the warning text.
WARN_FORMAT = "$file:$line: $text"
# The WARN_LOGFILE tag can be used to specify a file to which warning
# and error messages should be written. If left blank the output is written
# to stderr.
WARN_LOGFILE =
#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------
INPUT = xlhtml.c
# The INPUT tag can be used to specify the files and/or directories that contain
# documented source files. You may enter file names like "myfile.cpp" or
# directories like "/usr/src/myproject". Separate the files or directories
# with spaces.
INPUT = xlhtml.c ascii.c xml.c html.c support.c xlhtml.h tuneable.h
# If the value of the INPUT tag contains directories, you can use the
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank all files are included.
FILE_PATTERNS =
# The RECURSIVE tag can be used to turn specify whether or not subdirectories
# should be searched for input files as well. Possible values are YES and NO.
# If left blank NO is used.
RECURSIVE = NO
# The EXCLUDE tag can be used to specify files and/or directories that should
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE =
# If the value of the INPUT tag contains directories, you can use the
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
# certain files from those directories.
EXCLUDE_PATTERNS =
# The EXAMPLE_PATH tag can be used to specify one or more files or
# directories that contain example code fragments that are included (see
# the \include command).
EXAMPLE_PATH =
# If the value of the EXAMPLE_PATH tag contains directories, you can use the
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
# and *.h) to filter out the source-files in the directories. If left
# blank all files are included.
EXAMPLE_PATTERNS =
# The IMAGE_PATH tag can be used to specify one or more files or
# directories that contain image that are included in the documentation (see
# the \image command).
IMAGE_PATH =
# The INPUT_FILTER tag can be used to specify a program that doxygen should
# invoke to filter for each input file. Doxygen will invoke the filter program
# by executing (via popen()) the command <filter> <input-file>, where <filter>
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
# input file. Doxygen will then use the output that the filter program writes
# to standard output.
INPUT_FILTER =
# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
# INPUT_FILTER) will be used to filter the input files when producing source
# files to browse.
FILTER_SOURCE_FILES = NO
#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------
# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
# of all compounds will be generated. Enable this if the project
# contains a lot of classes, structs, unions or interfaces.
ALPHABETICAL_INDEX = NO
# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
# in which this list will be split (can be a number in the range [1..20])
COLS_IN_ALPHA_INDEX = 5
# In case all classes in a project start with a common prefix, all
# classes will be put under the same header in the alphabetical index.
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
# should be ignored while generating the index headers.
IGNORE_PREFIX =
#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------
# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
# generate HTML output.
GENERATE_HTML = YES
# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `html' will be used as the default path.
HTML_OUTPUT = html
# The HTML_HEADER tag can be used to specify a personal HTML header for
# each generated HTML page. If it is left blank doxygen will generate a
# standard header.
HTML_HEADER =
# The HTML_FOOTER tag can be used to specify a personal HTML footer for
# each generated HTML page. If it is left blank doxygen will generate a
# standard footer.
HTML_FOOTER =
# The HTML_STYLESHEET tag can be used to specify a user defined cascading
# style sheet that is used by each HTML page. It can be used to
# fine-tune the look of the HTML output. If the tag is left blank doxygen
# will generate a default style sheet
HTML_STYLESHEET =
# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
# files or namespaces will be aligned in HTML using tables. If set to
# NO a bullet list will be used.
HTML_ALIGN_MEMBERS = YES
# If the GENERATE_HTMLHELP tag is set to YES, additional index files
# will be generated that can be used as input for tools like the
# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
# of the generated HTML documentation.
GENERATE_HTMLHELP = NO
GENERATE_CHI = NO
BINARY_TOC = NO
TOC_EXPAND = NO
# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
# top of each HTML page. The value NO (the default) enables the index and
# the value YES disables it.
DISABLE_INDEX = NO
# This tag can be used to set the number of enum values (range [1..20])
# that doxygen will group on one line in the generated HTML documentation.
ENUM_VALUES_PER_LINE = 4
GENERATE_TREEVIEW = NO
TREEVIEW_WIDTH = 250
#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------
# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
# generate Latex output.
GENERATE_LATEX = NO
# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `latex' will be used as the default path.
LATEX_OUTPUT = latex
# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
# LaTeX documents. This may be useful for small projects and may help to
# save some trees in general.
COMPACT_LATEX = NO
# The PAPER_TYPE tag can be used to set the paper type that is used
# by the printer. Possible values are: a4, a4wide, letter, legal and
# executive. If left blank a4wide will be used.
PAPER_TYPE = a4wide
# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
# packages that should be included in the LaTeX output.
EXTRA_PACKAGES =
# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
# the generated latex document. The header should contain everything until
# the first chapter. If it is left blank doxygen will generate a
# standard header. Notice: only use this tag if you know what you are doing!
LATEX_HEADER =
# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
# is prepared for conversion to pdf (using ps2pdf). The pdf file will
# contain links (just like the HTML output) instead of page references
# This makes the output suitable for online browsing using a pdf viewer.
PDF_HYPERLINKS = NO
# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
# plain latex in the generated Makefile. Set this option to YES to get a
# higher quality PDF documentation.
USE_PDFLATEX = NO
# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
# command to the generated LaTeX files. This will instruct LaTeX to keep
# running if errors occur, instead of asking the user for help.
# This option is also used when generating formulas in HTML.
LATEX_BATCHMODE = NO
#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------
# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
# The RTF output is optimised for Word 97 and may not look very pretty with
# other RTF readers or editors.
GENERATE_RTF = NO
# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `rtf' will be used as the default path.
RTF_OUTPUT = rtf
# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
# RTF documents. This may be useful for small projects and may help to
# save some trees in general.
COMPACT_RTF = NO
# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
# will contain hyperlink fields. The RTF file will
# contain links (just like the HTML output) instead of page references.
# This makes the output suitable for online browsing using a WORD or other.
# programs which support those fields.
# Note: wordpad (write) and others do not support links.
RTF_HYPERLINKS = NO
# Load stylesheet definitions from file. Syntax is similar to doxygen's
# config file, i.e. a series of assigments. You only have to provide
# replacements, missing definitions are set to their default value.
RTF_STYLESHEET_FILE =
RTF_EXTENSIONS_FILE =
#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------
# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
# generate man pages
GENERATE_MAN = NO
# The MAN_OUTPUT tag is used to specify where the man pages will be put.
# If a relative path is entered the value of OUTPUT_DIRECTORY will be
# put in front of it. If left blank `man' will be used as the default path.
MAN_OUTPUT = man
# The MAN_EXTENSION tag determines the extension that is added to
# the generated man pages (default is the subroutine's section .3)
MAN_EXTENSION = .3
MAN_LINKS = NO
#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------
# If the GENERATE_XML tag is set to YES Doxygen will
# generate an XML file that captures the structure of
# the code including all documentation. Warning: This feature
# is still experimental and very incomplete.
GENERATE_XML = NO
#---------------------------------------------------------------------------
# Configuration options related to the preprocessor
#---------------------------------------------------------------------------
# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
# evaluate all C-preprocessor directives found in the sources and include
# files.
ENABLE_PREPROCESSING = YES
# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
# names in the source code. If set to NO (the default) only conditional
# compilation will be performed. Macro expansion can be done in a controlled
# way by setting EXPAND_ONLY_PREDEF to YES.
MACRO_EXPANSION = NO
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
# then the macro expansion is limited to the macros specified with the
# PREDEFINED and EXPAND_AS_PREDEFINED tags.
EXPAND_ONLY_PREDEF = NO
# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
# in the INCLUDE_PATH (see below) will be search if a #include is found.
SEARCH_INCLUDES = YES
# The INCLUDE_PATH tag can be used to specify one or more directories that
# contain include files that are not input files but should be processed by
# the preprocessor.
INCLUDE_PATH =
# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
# patterns (like *.h and *.hpp) to filter out the header-files in the
# directories. If left blank, the patterns specified with FILE_PATTERNS will
# be used.
INCLUDE_FILE_PATTERNS =
# The PREDEFINED tag can be used to specify one or more macro names that
# are defined before the preprocessor is started (similar to the -D option of
# gcc). The argument of the tag is a list of macros of the form: name
# or name=definition (no spaces). If the definition and the = are
# omitted =1 is assumed.
PREDEFINED =
# If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
# The macro definition that is found in the sources will be used.
# Use the PREDEFINED tag if you want to use a different macro definition.
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------
# Configuration::addtions related to external references
#---------------------------------------------------------------------------
# The TAGFILES tag can be used to specify one or more tagfiles.
TAGFILES =
# When a file name is specified after GENERATE_TAGFILE, doxygen will create
# a tag file that is based on the input files it reads.
GENERATE_TAGFILE =
# If the ALLEXTERNALS tag is set to YES all external classes will be listed
# in the class index. If set to NO only the inherited external classes
# will be listed.
ALLEXTERNALS = NO
# The PERL_PATH should be the absolute path and name of the perl script
# interpreter (i.e. the result of `which perl').
PERL_PATH = /usr/bin/perl
#---------------------------------------------------------------------------
# Configuration options related to the dot tool
#---------------------------------------------------------------------------
# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
# available from the path. This tool is part of Graphviz, a graph visualization
# toolkit from AT&T and Lucent Bell Labs. The other options in this section
# have no effect if this option is set to NO (the default)
HAVE_DOT = NO
# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
# will generate a graph for each documented class showing the direct and
# indirect inheritance relations. Setting this tag to YES will force the
# the CLASS_DIAGRAMS tag to NO.
CLASS_GRAPH = YES
# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
# will generate a graph for each documented class showing the direct and
# indirect implementation dependencies (inheritance, containment, and
# class references variables) of the class with other documented classes.
COLLABORATION_GRAPH = YES
TEMPLATE_RELATIONS = YES
# If the ENABLE_PREPROCESSING, INCLUDE_GRAPH, and HAVE_DOT tags are set to
# YES then doxygen will generate a graph for each documented file showing
# the direct and indirect include dependencies of the file with other
# documented files.
INCLUDE_GRAPH = YES
# If the ENABLE_PREPROCESSING, INCLUDED_BY_GRAPH, and HAVE_DOT tags are set to
# YES then doxygen will generate a graph for each documented header file showing
# the documented files that directly or indirectly include this file
INCLUDED_BY_GRAPH = YES
# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
# will graphical hierarchy of all classes instead of a textual one.
GRAPHICAL_HIERARCHY = YES
# The tag DOT_PATH can be used to specify the path where the dot tool can be
# found. If left blank, it is assumed the dot tool can be found on the path.
DOT_PATH =
DOTFILE_DIRS =
# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
# (in pixels) of the graphs generated by dot. If a graph becomes larger than
# this value, doxygen will try to truncate the graph, so that it fits within
# the specified constraint. Beware that most browsers cannot cope with very
# large images.
MAX_DOT_GRAPH_WIDTH = 1024
# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
# (in pixels) of the graphs generated by dot. If a graph becomes larger than
# this value, doxygen will try to truncate the graph, so that it fits within
# the specified constraint. Beware that most browsers cannot cope with very
# large images.
MAX_DOT_GRAPH_HEIGHT = 1024
# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
# generate a legend page explaining the meaning of the various boxes and
# arrows in the dot generated graphs.
GENERATE_LEGEND = YES
DOT_CLEANUP = YES
#---------------------------------------------------------------------------
# Configuration::addtions related to the search engine
#---------------------------------------------------------------------------
# The SEARCHENGINE tag specifies whether or not a search engine should be
# used. If set to NO the values of all tags below this one will be ignored.
SEARCHENGINE = NO
# The CGI_NAME tag should be the name of the CGI script that
# starts the search engine (doxysearch) with the correct parameters.
# A script with this name will be generated by doxygen.
CGI_NAME = search.cgi
# The CGI_URL tag should be the absolute URL to the directory where the
# cgi binaries are located. See the documentation of your http daemon for
# details.
CGI_URL =
# The DOC_URL tag should be the absolute URL to the directory where the
# documentation is located. If left blank the absolute path to the
# documentation, with file:// prepended to it, will be used.
DOC_URL =
# The DOC_ABSPATH tag should be the absolute path to the directory where the
# documentation is located. If left blank the directory on the local machine
# will be used.
DOC_ABSPATH =
# The BIN_ABSPATH tag must point to the directory where the doxysearch binary
# is installed.
BIN_ABSPATH = /usr/local/bin/
# The EXT_DOC_PATHS tag can be used to specify one or more paths to
# documentation generated for other projects. This allows doxysearch to search
# the documentation for these projects as well.
EXT_DOC_PATHS =

View File

@ -1,58 +1,37 @@
/*! \file html.c
\brief HTML output for xlhtml
*/
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "xlhtml.h"
#include "support.h"
#include <stdio.h>
extern void do_cr(void);
extern int center_tables;
extern int first_sheet;
extern int last_sheet;
extern uni_string default_font;
extern void trim_sheet_edges(unsigned int);
extern int next_ws_title;
extern void SetupExtraction(void);
extern void update_default_font(unsigned int);
extern void OutputString(uni_string * );
extern int default_fontsize;
extern char *default_alignment;
extern int aggressive;
extern char *lastUpdated;
extern int file_version;
extern int NoFormat;
extern int notAccurate;
extern int formula_warnings;
extern int NoHeaders;
extern int NotImplemented;
extern int Unsupported;
extern int MaxWorksheetsExceeded;
extern int MaxRowExceeded;
extern int MaxColExceeded;
extern int MaxStringsExceeded;
extern int MaxFontsExceeded;
extern int MaxPalExceeded;
extern int MaxXFExceeded;
extern int MaxFormatsExceeded;
extern char colorTab[MAX_COLORS][8];
extern char *default_text_color;
extern char *default_background_color;
extern char *default_image;
extern char filename[256];
extern int UnicodeStrings;
extern int CodePage;
extern char *title;
extern void update_default_alignment(unsigned int, int);
extern void output_cell( cell *, int);
extern uni_string author;
extern int null_string(U8 *);
extern unsigned int next_font;
work_sheet **ws_array;
font_attr **font_array;
/* prototypes for functions in this file */
void output_header(void);
void output_footer(void);
void OutputTableHTML(void)
{
int i, j, k;

View File

@ -1,11 +1,34 @@
/*! \file support.c
\brief Various support functions for xlhtml.
*/
/* Various support functions for xlhtml. */
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "support.h"
#include <stdio.h>
#include <time.h>
#include "../cole/cole.h"
#include <stdlib.h>
void print_version(void)
{
printf("xlhtml %s \nCopyright (c) 1999-2002, Charles Wyble\n"
@ -182,8 +205,8 @@ void NumToDate(long num, int *year, int *month, int *day)
*year = *year % 100;
}
/* noaliasdub macro avoids trouble from gcc -O2 type-based alias analysis */
typedef S32 swords[2];
/*! avoids trouble from gcc -O2 type-based alias analysis */
#define noaliasdub(type,ptr) \
(((union{swords sw; F64 dub;} *)(ptr))->sw)

View File

@ -1,7 +1,40 @@
/*! \file tuneable.h
\brief Values for tuning performance of xlhtml
*/
#define XFORMATS_INCR 64 /*!< Increments to allocate extended formats */
#define FONTS_INCR 32 /*!< Increments to allocate fonts */
#define WORKSHEETS_INCR 4 /*!< Increments to allocate worksheet pages */
#define COLS_INCR (U16)24 /*!< Increments to allocate Columns per Worksheet page */
#define ROWS_INCR (U32)128 /*!< Increments to allocate Rows per Worksheet page */
#define STRINGS_INCR 256UL /*!< Increments to allocate the string array - */
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*! \brief Increments to allocate extended formats */
#define XFORMATS_INCR 64
/*! \brief Increments to allocate fonts */
#define FONTS_INCR 32
/*! \brief Increments to allocate worksheet pages */
#define WORKSHEETS_INCR 4
/*! \brief Increments to allocate Columns per Worksheet page */
#define COLS_INCR (U16)24
/*! \brief Increments to allocate Rows per Worksheet page */
#define ROWS_INCR (U32)128
/*! \brief Increments to allocate the string array */
#define STRINGS_INCR 256UL

View File

@ -1,7 +1,7 @@
/*! \file xlhtml.c
\brief converts excel files to Html
\brief converts MS Excel files to useful formats
xlhtml generates HTML, XML, csv and tab-delimitted versions of Excel
xlhtml generates HTML, XML, csv and tab-delimited versions of MS Excel
spreadsheets.
*/
@ -21,11 +21,13 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "tuneable.h"
#include "xlhtml.h"
#include "support.h"
static U16 HARD_MAX_ROWS = HARD_MAX_ROWS_97;
@ -117,19 +119,6 @@ const char month_abbr[12][5] = { "Jan", "Feb", "Mar", "Apr", "May", "June",
/* Function Prototypes */
/* These functions are in support.c */
extern void print_version(void);
extern void display_usage(void);
extern void do_cr(void);
extern void OutputTableHTML(void);
extern S32 getLong(U8 *);
extern U16 getShort(U8 *);
extern void getDouble(U8 *, F64 *);
extern int null_string(U8 *);
extern void FracToTime(U8 *, int *, int *, int *, int *);
extern void NumToDate(long, int *, int *, int *);
extern void RKtoDouble(S32, F64 *);
/* This function is in xml.c */
extern void OutputTableXML(void);
@ -137,6 +126,7 @@ extern void OutputTableXML(void);
void OutputPartialTableAscii(void);
/* These functions are in html.c */
extern void OutputTableHTML(void);
extern void output_start_html_attr(html_attr *h, unsigned int, int);
extern void output_end_html_attr(html_attr *h);
extern void output_footer(void);
@ -144,21 +134,11 @@ extern void output_header(void);
COLE_LOCATE_ACTION_FUNC scan_file;
void main_line_processor(U16, U16, U32, U16, U8);
void SetupExtraction(void);
void decodeBoolErr(U16, U16, char *);
int IsCellNumeric(cell *);
int IsCellSafe(cell *);
int IsCellFormula(cell *);
void output_cell(cell *, int);
void output_formatted_data(uni_string *, U16, int, int);
void PrintFloatComma(char *, int, F64);
void print_as_fraction(F64, int);
void trim_sheet_edges(unsigned int);
void update_default_font(unsigned int);
void incr_f_cnt(uni_string *);
int get_default_font(void);
void update_default_alignment(unsigned int, int);
void OutputString(uni_string *);
void OutputCharCorrected(U8);
void update_crun_info(U16 *loc, U16 *fnt_idx, U16 crun_cnt, U8 *fmt_run);
void put_utf8(U16);
@ -187,15 +167,7 @@ char filename[256];
int file_version = 0;
U32 next_string=0;
unsigned int next_font=0, next_ws_title=0, next_xf=0;
U8 working_buffer[WBUFF_SIZE];
unsigned int bufidx, buflast; /*!< Needed for working buffer */
U8 grbit=0; /*!< Needed by the SST Opcode FC */
U16 crun=0, cch=0; /*!< Needed by the SST Opcode FC */
U32 extrst=0; /*!< Needed by the SST Opcode FC */
U16 nonascii = 0; /*!< Needed by the SST Opcode FC */
int sheet_count=-2; /*!< Number of worksheets found */
U16 last_opcode = -1; /*!< Used for the continue command */
unsigned int cont_grbit=0, cont_str_array=0;
uni_string default_font; /*!< Font for table */
int default_fontsize = 3; /*!< Default font size for table */
char *default_alignment = 0; /*!< Alignment for table */
@ -240,7 +212,7 @@ int OutputXML = 0; /*!< Output as xml */
int DumpPage = 0; /*!< Dump page count & max cols & rows */
int Xtract = 0; /*!< Extract a range on a page. */
int MultiByte = 0; /*!< Output as multibyte */
int NoHeaders = 0; /*!< Dont output html header */
int NoHeaders = 0; /*!< Don't output html header */
/* Some Global Flags */
@ -553,15 +525,10 @@ int main (int argc, char **argv)
void scan_file(COLEDIRENT *cde, void *_info)
{
U32 count=0;
U16 length=0, target=0, opcode=0, version=0;
U16 target=0, opcode=0, version=0;
U8 buf[16];
COLEFILE *cf;
COLERRNO err;
@ -576,28 +543,27 @@ void scan_file(COLEDIRENT *cde, void *_info)
/* Read & process the file... */
while (cole_fread(cf, buf, 1, &err))
{
/* The BIFF record format:
* - byte 0: opcode
* - byte 1: version (?)
* - bytes 2 and 3: 16-bit data length
* - from byte 4: record data
*/
if (count > 3)
main_line_processor(opcode, version, count-4, target, buf[0]);
else if (count == 0)
{ /* Init everything */
length = 0;
opcode = (U16)buf[0];
target = 80; /* ficticious number */
}
else if (count == 1)
version = (U16)buf[0];
else if (count == 2)
length = (U16)buf[0];
target = (U16)buf[0];
else if (count == 3)
{
length |= (U16)(buf[0]<<8);
target = length;
}
if (count == (U32)(target+3))
target |= (U16)(buf[0]<<8);
if ((count >= 3) && (count == (U32)target + 3))
count = 0;
else
count++;
if (MaxColExceeded || MaxRowExceeded || MaxWorksheetsExceeded)
break; /* We're outta memory and therefore...done */
@ -648,10 +614,6 @@ void scan_file(COLEDIRENT *cde, void *_info)
void SetupExtraction(void)
{
if (Xtract)
@ -725,29 +687,41 @@ void SetupExtraction(void)
}
/*!******************************************************************
* \param count the absolute count in the record
* \param last the size of the record
* \param bufidx the index into the working buffer
* \param buflast the expected length of the working buffer
********************************************************************/
/*!
* This function gets called once for every single byte of record data.
* \brief Process the record data.
* \param opcode the opcode of the current BIFF record
* \param version the version (?) of the current BIFF record
* \param count the absolute offset in the record data
* \param last the size of the record data
* \param data the character being processed
*/
void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
{
U16 cont_opcode = 0;
int cont_opcode = 0; /* is this the CONTINUE opcode? */
static U16 last_opcode = -1; /* holds last non-CONTINUE opcode */
static U8 working_buffer[WBUFF_SIZE];
static unsigned int bufidx = 0; /* the index into the working buffer */
static unsigned int buflast = 0; /* the expected length of the working buffer */
/* Needed by the SST Opcode 0xFC: */
static U8 str_options = 0;
static int buf_16bit = 0;
static int now_16bit = 0;
static int after_str_header = 0;
static U16 num_chars = 0;
static U16 num_fmt_runs = 0;
static U32 fareast_bytes = 0;
static U16 nonascii = 0;
/* If first pass, reset stuff. */
/* On start of record, reset stuff. */
if (count == 0)
{
if (opcode != 0x3C) /* continue command */
/* {
printf("\n* * * * * * CONTINUE * * * * * * * * *\n\n");
}
else */
{ /* Normal path... */
if (opcode != 0x3C) /* not CONTINUE opcode */
{
last_opcode = opcode;
bufidx = 0;
buflast = 0;
cont_str_array = 0;
memset(working_buffer, 0, WBUFF_SIZE);
}
}
@ -760,8 +734,10 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
/* Abort processing if too big. Next opcode will reset everything. */
if (bufidx >= WBUFF_SIZE)
{
/*printf("OC:%02X C:%04X I:%04X BL:%04X cch:%04X gr:%04X\n", opcode, count, bufidx, buflast, cch, grbit); */
/*abort(); */
/* this will be printed many times; leave it this way since it's temporary
* anyway - the buffer must be made dynamic
*/
fprintf(stderr, "Warning: working buffer overflow!\n");
return;
}
@ -906,13 +882,13 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
{ /* Remember, bufidx is 1 more than it should be */
if ((bufidx == 8)&&(buflast == 0))
{ /* buflast = working_buffer[7]; */
cch = getShort(&working_buffer[6]);
buflast = cch + 9;
num_chars = getShort(&working_buffer[6]);
buflast = num_chars + 9;
}
if (bufidx == 9)
{
if (working_buffer[8] == 1)
buflast = (cch << 1) + 9;
buflast = (num_chars << 1) + 9;
}
if (buflast)
{
@ -933,7 +909,7 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
if (working_buffer[8] == 1)
{
UnicodeStrings = 2;
add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(cch << 1), 0, 0);
add_wb_array(r, c, f, opcode, (U16)2, &working_buffer[9], (U16)(num_chars << 1), 0, 0);
}
else
add_wb_array(r, c, f, opcode, (U16)0, &working_buffer[8], len, 0, 0);
@ -965,134 +941,157 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
}
}
break;
/************
* This function has 2 entry points. 1 is the mainline FC opcode.
* In this event there are several bytes that setup the type of
* strings that will follow. Then there is the continue entry
* point which is immediate - e.g location 0.
*************/
case 0xFC: /* Packed String Array A.K.A. SST Shared String Table...UNI */
if ((count > 7)||(cont_opcode == 1)) /* Skip the 1st 8 locations they are bs */
case 0xFC: /* Packed String Array A.K.A. SST - Shared String Table */
/* Format of the SST:
* - bytes 0-3: total number of strings in the workbook (32-bit int)
* - bytes 4-7: number of strings following this header (32-bit int)
* - from byte 8: strings; count is in the preceding int
* Format of the individual strings:
* - bytes 0-1: LN = number of characters (not bytes!) in the string (16-bit int)
* - byte 2: option flags (see below)
* - from byte 3:
* if rich-text flag set:
* - RT = number of rich-text formatting runs (16-bit int)
* if far-east flag set:
* - FE = far-east data size (32-bit int)
* LN characters, either 8-bit or 16-bit each
* if rich-text flag set:
* - RT-times repeated:
* - first formatted character, zero-based (16-bit int)
* - index to font record (16-bit int)
* if far-east flag set:
* - unknown extended data about phonetic, keyboard etc. (FE bytes)
* Option flags (byte 2 in string) is a bit field:
* - bit 0 (01h): 0 = 8-bit characters, 1 = 16-bit characters
* - bit 2 (04h): 0 = no far-east info, 1 = contains far-east info
* - bit 3 (08h): 0 = no rich-text formatting, 1 = contains rich-text formatting
*
* The CONTINUE opcode has a special case for the SST: when a string
* is split into two records, there is one additional byte at the start
* of the second record, which indicates (like the option flags at the
* start of the string) with its bit 0 whether the continuation has
* 8-bit or 16-bit characters. Thus, the strings can start with 8-bit
* characters and continue with 16-bit characters, or vice versa.
*/
if ((count == 0) && !cont_opcode)
{ /* initialize variables */
bufidx = 0;
after_str_header = 0;
}
if ((count < 8) && !cont_opcode)
/*! \todo For now, ignore the SST header. Ideally, we should
* process at most the number of strings that is stored in
* the header, ignoring the rest. Fortunately, the case when
* this would be necessary doesn't seem to occur.
*/
break;
if ((count == 0) && cont_opcode && after_str_header)
{
/* if ((count == 0)&&(data == 0)&&(buflast)) */
if ((count == 0)&&(cont_opcode == 1)&&(buflast))
{
/* printf("Adjusting...\n"); */
/* printf("I:%04X BL:%04X\n", bufidx, buflast); */
cont_str_array = 1;
cont_grbit = data;
if ((cont_grbit & 0x01) && !(grbit & 0x01))
now_16bit = data & 0x01;
if (now_16bit && !buf_16bit)
{ /* previous chunk was not Unicode but this one is */
int i;
grbit |= 0x01;
/* we will be doubling the buffer contents
* => check if there is space
*/
if (2*buflast >= WBUFF_SIZE)
{
bufidx = WBUFF_SIZE + 1;
break;
}
if (bufidx >= num_chars)
{ /* weird - we got a unicode flag, but we're
* actually already done with the string data
*/
now_16bit = 0;
}
else
{
buf_16bit = 1;
if (bufidx > 0)
for (i = bufidx-1; i >= 0; i--) {
working_buffer[2*i] = working_buffer[i];
working_buffer[2*i+1] = 0;
}
bufidx = 2 * bufidx;
buflast = 2 * buflast;
buflast = 2*num_chars + 4*num_fmt_runs + fareast_bytes;
}
return;
}
break;
}
working_buffer[bufidx] = data;
bufidx++;
working_buffer[bufidx++] = data;
if (buf_16bit && !now_16bit && after_str_header && (bufidx < (2*num_chars)))
/* ASCII -> unicode */
working_buffer[bufidx++] = 0;
if((cont_str_array)&&(grbit & 0x01)&& !(cont_grbit & 0x01))
{ /* ASCII -> unicode */
working_buffer[bufidx] = 0;
bufidx++;
}
if (buflast == 0) /* Header processor */
if (! after_str_header) /* Header processor */
{
if (bufidx == 0x03) /* After 3 locations we have length */
{ /* and type of chars... */
cch = getShort(&working_buffer[0]);
grbit = working_buffer[2];
if (grbit < 0x04) /* Normal run */
{
nonascii = 0;
bufidx = 0;
crun = 0;
extrst = 0;
buflast = cch << (grbit & 0x01);
/* special case for empty strings */
if (!cch && !buflast)
{ /* and option flags... */
num_chars = getShort(&working_buffer[0]);
str_options = working_buffer[2];
now_16bit = buf_16bit = str_options & 0x01;
}
if ((bufidx == 0x03) && ((str_options & 0x0C) == 0))
{ /* Normal run */
num_fmt_runs = 0;
fareast_bytes = 0;
buflast = num_chars << buf_16bit;
after_str_header = 1;
}
else if ((bufidx == 0x05) && ((str_options & 0x0C) == 0x08))
{ /* Rich-text formatted string only */
num_fmt_runs = getShort(&working_buffer[3]);
fareast_bytes = 0;
buflast = (num_chars << buf_16bit) + (num_fmt_runs*4);
after_str_header = 1;
}
else if ((bufidx == 0x07) && ((str_options & 0x0C) == 0x04))
{ /* Extended (Far-East) string only */
num_fmt_runs = 0;
fareast_bytes = getLong(&working_buffer[3]);
buflast = (num_chars << buf_16bit) + fareast_bytes;
after_str_header = 1;
}
else if ((bufidx == 0x09) && ((str_options & 0x0C) == 0x0C))
{ /* Rich-text formatted string + Extended (Far-East) string */
num_fmt_runs = getShort(&working_buffer[3]);
fareast_bytes = getLong(&working_buffer[5]);
buflast = (num_chars << buf_16bit) + fareast_bytes + (num_fmt_runs*4);
after_str_header = 1;
}
else if (bufidx >= 0x09)
{ /* Houston, we have a problem. This should never happen. */
fprintf(stderr, "An error in SST processing occured. Please contact the author.\n");
bufidx = WBUFF_SIZE + 1; /* simulate buffer overflow to skip rest of opcode */
break;
}
if (after_str_header)
{ /* the string data is now starting */
if (buflast == 0)
{ /* special case for empty strings */
add_str_array(0, (U8 *)0, 0, 0, 0);
after_str_header = 0;
}
else
{
memset(working_buffer, 0, WBUFF_SIZE);
}
}
else if (bufidx == 0x05)
{
if ((grbit & 0x0C) == 0x08) /* Rich string only */
{
nonascii = 0;
}
bufidx = 0;
crun = getShort(&working_buffer[3]);
extrst = 0;
buflast = (cch << (grbit & 0x01)) + (crun*4);
/* printf("rtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
buflast, cch, grbit, extrst, crun, last);
printf("%02X %02X %02X %02X %02X %02X\n",
working_buffer[0], working_buffer[1], working_buffer[2],
working_buffer[3], working_buffer[4], working_buffer[5]); */
memset(working_buffer, 0, WBUFF_SIZE);
}
}
else if (bufidx == 0x07)
{
if ((grbit & 0x0C) == 0x04) /* Extended string only */
{
nonascii = 0;
bufidx = 0;
crun = 0;
extrst = getLong(&working_buffer[3]);
buflast = (cch << (grbit & 0x01)) + extrst;
/* printf("esbuflast:%X cch%X grbit:%X extrst:%X last:%X\n",
buflast, cch, grbit, extrst, last);
printf("%02X %02X %02X %02X %02X %02X\n",
working_buffer[0], working_buffer[1], working_buffer[2],
working_buffer[3], working_buffer[4], working_buffer[5]); */
memset(working_buffer, 0, WBUFF_SIZE);
}
}
else if (bufidx == 0x09)
{
if ((grbit & 0x0C) == 0x0C)
{
/* Rich String + Extended String **/
nonascii = 0;
bufidx = 0;
crun = getShort(&working_buffer[3]);
extrst = getLong(&working_buffer[5]);
buflast = (cch << (grbit & 0x01)) + extrst + (crun*4);
/* printf("xrtbuflast:%X cch%X grbit:%X extrst:%X crun:%X last:%X\n",
buflast, cch, grbit, extrst, crun, last);
printf("%02X %02X %02X %02X %02X %02X\n",
working_buffer[0], working_buffer[1], working_buffer[2],
working_buffer[3], working_buffer[4], working_buffer[5]); */
memset(working_buffer, 0, WBUFF_SIZE);
}
}
/* printf("*%02X ", data); */
}
else /* payload processor */
{
/* if (cont_opcode == 1)
printf(" %02X", data); */
if (data > 127)
nonascii = 1;
if (bufidx == buflast)
{
U8 uni;
U16 len = (U16)(cch << (grbit & 0x01));
/* int i; */
U16 len = (U16)(num_chars << buf_16bit);
if (grbit & 01)
if (buf_16bit)
{
uni = 2;
UnicodeStrings = 2;
@ -1100,27 +1099,18 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
else
uni = nonascii;
working_buffer[bufidx] = 0;
/* fprintf(stderr,":buflast-"); */
/* { int i; */
/* for (i=0; i<buflast; i++) */
/* putchar(working_buffer[i]); */
/* fprintf(stderr,"\nNext String:%d\n", next_string); */
/* } */
if (crun)
add_str_array(uni, working_buffer, len, working_buffer+len, crun);
if (num_fmt_runs)
add_str_array(uni, working_buffer, len, working_buffer+len, num_fmt_runs);
else
add_str_array(uni, working_buffer, len, 0, 0);
if (uni > UnicodeStrings) /* Try to "upgrade" charset */
UnicodeStrings = uni;
bufidx = 0;
buflast = 0;
cch = 0;
cont_str_array = 0;
after_str_header = 0;
memset(working_buffer, 0, WBUFF_SIZE);
}
}
}
break;
case 0xFD: /* String Array Index A.K.A. LABELSST */
working_buffer[count] = data;
@ -1806,9 +1796,6 @@ void main_line_processor(U16 opcode, U16 version, U32 count, U16 last, U8 data)
/*! returns 1 on error, 0 on success */
int ws_init(int i)
{
@ -1841,6 +1828,8 @@ int ws_init(int i)
return 0;
}
/*! returns 1 on error, 0 on success */
int add_more_worksheet_ptrs(void)
{
@ -1877,6 +1866,8 @@ int add_more_worksheet_ptrs(void)
return 0;
}
int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
{
cell **tc_array;
@ -1908,6 +1899,8 @@ int resize_c_array(work_sheet *ws, U32 new_rows, U16 new_cols)
return 0;
}
void add_wb_array(U16 r, U16 c, U16 xf, U16 type, U8 uni,
U8 *str, U16 len, U16 crun_cnt, U8 *fmt_run)
{

View File

@ -1,3 +1,30 @@
/*! \file xlhtml.h
\brief Header file for xlhtml
*/
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __XLHTML_H_INCLUDED
#define __XLHTML_H_INCLUDED
#if !(defined( __BORLANDC__ ) || defined( __WIN32__ ))
#include "config.h" /* Created by ./configure script */
@ -40,7 +67,8 @@
#define GLOBAL_UMASK (2)
#endif
typedef struct /*!< This encapsulates the Unicode String */
/*! \brief This encapsulates the Unicode String */
typedef struct
{
U8 uni; /*!< Unicode String: 0==ASCII/8859-1, 1==windows-1252, 2==utf-8 */
U8 *str; /*!< Characters of string */
@ -49,7 +77,8 @@ typedef struct /*!< This encapsulates the Unicode String */
U8 crun_cnt; /*!< The count of format runs */
} uni_string;
typedef struct /*!< This is everything we need for a cell */
/*! \brief This is everything we need for a cell */
typedef struct
{
U16 xfmt; /*!< The high bit will tell us which version 0 =< 2; 1 == 2+ */
U16 type; /*!< This will record the record type that generated the cell */
@ -60,7 +89,8 @@ typedef struct /*!< This is everything we need for a cell */
uni_string h_link; /*!< If a hyperlinked cell, this is the link*/
} cell;
typedef struct /*!< This encapsulates some information about each worksheet */
/*! \brief This encapsulates some information about each worksheet */
typedef struct
{
U32 first_row;
S32 biggest_row;
@ -73,7 +103,8 @@ typedef struct /*!< This encapsulates some information about each worksheet */
U16 spanned;
} work_sheet;
typedef struct /*!< This is everything we need to know about fonts */
/*! \brief This is everything we need to know about fonts */
typedef struct
{
U16 size;
U16 attr;
@ -90,7 +121,8 @@ typedef struct
U16 cnt;
} fnt_cnt;
typedef struct /*!< This covers the Extended Format records */
/*! \brief This covers the Extended Format records */
typedef struct
{
U16 fnt_idx;
U16 fmt_idx;
@ -103,7 +135,8 @@ typedef struct /*!< This covers the Extended Format records */
U16 cell_color;
} xf_attr;
typedef struct /*!< HTML Attribute */
/*! \brief HTML Attribute */
typedef struct
{
int fflag; /*!< Font Flag */
int bflag; /*!< Bold Flag */
@ -115,3 +148,57 @@ typedef struct /*!< HTML Attribute */
} html_attr;
extern int first_sheet;
extern int last_sheet;
extern char filename[256];
extern char *default_text_color;
extern char *default_background_color;
extern char *default_image;
extern int aggressive;
extern int center_tables;
extern int NoHeaders;
extern int formula_warnings;
extern int Csv;
extern xf_attr **xf_array;
extern work_sheet **ws_array;
extern font_attr **font_array;
extern uni_string default_font;
extern unsigned int next_font;
extern unsigned int next_ws_title;
extern int default_fontsize;
extern char *default_alignment;
extern char *title;
extern uni_string author;
extern char *lastUpdated;
extern int file_version;
extern char colorTab[MAX_COLORS][8];
extern int NoFormat;
extern int notAccurate;
extern int NotImplemented;
extern int Unsupported;
extern int MaxPalExceeded;
extern int MaxXFExceeded;
extern int MaxFormatsExceeded;
extern int MaxColExceeded;
extern int MaxRowExceeded;
extern int MaxWorksheetsExceeded;
extern int MaxStringsExceeded;
extern int MaxFontsExceeded;
extern int UnicodeStrings;
extern int CodePage;
void OutputString (uni_string *);
void output_cell (cell *, int);
int IsCellNumeric (cell *);
int IsCellSafe (cell *);
int IsCellFormula (cell *);
void output_formatted_data (uni_string *, U16, int, int);
void SetupExtraction (void);
void trim_sheet_edges (unsigned int);
void update_default_font (unsigned int);
void update_default_alignment (unsigned int, int);
#endif

View File

@ -1,38 +1,28 @@
/*! \file xml.c
\brief XML output for xlhtml
*/
/*
Copyright 2002 Charles N Wyble <jackshck@yahoo.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "xlhtml.h"
extern int first_sheet;
extern int last_sheet;
extern uni_string default_font;
extern void trim_sheet_edges(unsigned int);
extern int next_ws_title;
extern void SetupExtraction(void);
extern void update_default_font(unsigned int);
extern void OutputString(uni_string * );
extern char *lastUpdated;
extern int file_version;
extern int NoFormat;
extern int notAccurate;
extern int formula_warnings;
extern int NotImplemented;
extern int Unsupported;
extern int MaxWorksheetsExceeded;
extern int MaxRowExceeded;
extern int MaxColExceeded;
extern int MaxStringsExceeded;
extern int MaxFontsExceeded;
extern int MaxPalExceeded;
extern int MaxXFExceeded;
extern int MaxFormatsExceeded;
extern char colorTab[MAX_COLORS];
extern char filename[256];
extern int UnicodeStrings;
extern char *title;
extern void update_default_alignment(unsigned int, int);
extern void output_cell( cell *, int);
extern uni_string author;
work_sheet **ws_array;
void OutputTableXML(void)
@ -87,8 +77,8 @@ void OutputTableXML(void)
}
printf( "\t\t\t<firstrow>%ld</firstrow>\n", (unsigned long)ws_array[i]->first_row );
printf( "\t\t\t<lastrow>%ld</lastrow>\n", (int) ws_array[i]->biggest_row );
printf( "\t\t\t<firstcol>%d</firstcol>\n", (long) ws_array[i]->first_col );
printf( "\t\t\t<lastrow>%d</lastrow>\n", (int) ws_array[i]->biggest_row );
printf( "\t\t\t<firstcol>%ld</firstcol>\n", (long) ws_array[i]->first_col );
printf( "\t\t\t<lastcol>%d</lastcol>\n", (int)ws_array[i]->biggest_col );
printf( "\t\t\t<rows>\n" );