From c2efa5406e8e6ddd306388575e4d82781e198b80 Mon Sep 17 00:00:00 2001 From: Norman Feske Date: Thu, 17 Feb 2022 12:17:19 +0100 Subject: [PATCH] xml_node: support backslash as attribute value XML allows attribute values like . The XML parser wrongly reflects this case as 'Invalid_syntax'. This behavior stems from the implicit use of the 'end_of_quote' function, which considers the sequence of '\"' as a quoted '"' rather than the end of a quoted string. The patch solves this problem by making the 'end_of_quote' part of the tokenizer's scanner policy. The patch removes the 'end_of_quote' function from 'util/string.h' because it is not universal, and to avoid the ambiguity with 'SCANNER_POLICY::end_of_quote'. Fixes #4431 --- repos/base/include/util/string.h | 12 +++--------- repos/base/include/util/token.h | 15 ++++++++++++++- repos/base/include/util/xml_node.h | 16 ++++++++++------ repos/base/recipes/pkg/test-xml_node/runtime | 3 +++ repos/base/src/test/xml_node/test.cc | 10 ++++++++++ repos/libports/src/lib/libc/file_operations.cc | 13 +------------ repos/libports/src/lib/libc/kernel.cc | 12 +----------- repos/os/include/vfs/types.h | 13 +++++++++++++ repos/os/src/lib/net/ipv4.cc | 3 +++ repos/os/src/lib/vfs/tar_file_system.h | 8 -------- 10 files changed, 58 insertions(+), 47 deletions(-) diff --git a/repos/base/include/util/string.h b/repos/base/include/util/string.h index 3726005c26..4ab462fc48 100644 --- a/repos/base/include/util/string.h +++ b/repos/base/include/util/string.h @@ -512,15 +512,6 @@ namespace Genode { } - /** - * Check for end of quotation - * - * Checks if next character is non-backslashed quotation mark. - */ - inline bool end_of_quote(const char *s) { - return s[0] != '\\' && s[1] == '\"'; } - - /** * Unpack quoted string * @@ -537,6 +528,9 @@ namespace Genode { src++; + auto end_of_quote = [] (const char *s) { + return s[0] != '\\' && s[1] == '\"'; }; + size_t i = 0; for (; *src && !end_of_quote(src - 1) && (i < dst_len - 1); i++) { diff --git a/repos/base/include/util/token.h b/repos/base/include/util/token.h index 44982342b1..e67d6e633d 100644 --- a/repos/base/include/util/token.h +++ b/repos/base/include/util/token.h @@ -40,6 +40,19 @@ struct Genode::Scanner_policy_identifier_with_underline */ static bool identifier_char(char c, unsigned i) { return is_letter(c) || (c == '_') || (i && is_digit(c)); } + + /** + * Check for end of quotation + * + * Checks if next character is non-backslashed quotation mark. + * The end of a quoted string is reached when encountering a '"' + * character that is not preceded by a backslash. + * + * \param s pointer to null-terminated string with at least one + * character + */ + static bool end_of_quote(const char *s) { + return s[0] != '\\' && s[1] == '\"'; } }; @@ -184,7 +197,7 @@ class Genode::Token * Hence, the upper bound of the index is max_len - 2. */ unsigned i = 0; - for (; i + 1 < max_len && !end_of_quote(&_start[i]); i++) + for (; i + 1 < max_len && !SCANNER_POLICY::end_of_quote(&_start[i]); i++) /* string ends without final quotation mark? too bad! */ if (!_start[i]) return 0; diff --git a/repos/base/include/util/xml_node.h b/repos/base/include/util/xml_node.h index 56e1900352..fede99d2b9 100644 --- a/repos/base/include/util/xml_node.h +++ b/repos/base/include/util/xml_node.h @@ -35,13 +35,17 @@ class Genode::Xml_attribute { private: - /** - * Scanner policy that accepts hyphens in identifiers - */ - struct Scanner_policy_xml_identifier { - static bool identifier_char(char c, unsigned i) { + struct Scanner_policy_xml_identifier + { + static bool identifier_char(char c, unsigned i) + { + /* accepts hyphens in identifiers */ return is_letter(c) || c == '_' || c == ':' - || (i && (c == '-' || c == '.' || is_digit(c))); } }; + || (i && (c == '-' || c == '.' || is_digit(c))); + } + + static bool end_of_quote(const char *s) { return s[1] == '\"'; } + }; /** * Define tokenizer that matches XML tags (with hyphens) as identifiers diff --git a/repos/base/recipes/pkg/test-xml_node/runtime b/repos/base/recipes/pkg/test-xml_node/runtime index dc9d024687..080a24193b 100644 --- a/repos/base/recipes/pkg/test-xml_node/runtime +++ b/repos/base/recipes/pkg/test-xml_node/runtime @@ -96,6 +96,9 @@ [init -> test-xml_node] XML node: name = "visible-tag" [init -> test-xml_node] XML node: name = "visible-tag" [init -> test-xml_node] + [init -> test-xml_node] -- Test backslash as attribute value -- + [init -> test-xml_node] attribute value: '\' + [init -> test-xml_node] [init -> test-xml_node] -- Test exporting decoded content from XML node -- [init -> test-xml_node] step 1 [init -> test-xml_node] step 2 diff --git a/repos/base/src/test/xml_node/test.cc b/repos/base/src/test/xml_node/test.cc index b0bbae8410..494cdabd6b 100644 --- a/repos/base/src/test/xml_node/test.cc +++ b/repos/base/src/test/xml_node/test.cc @@ -148,6 +148,10 @@ static const char *xml_test_comments = "" ""; +/* backslash in attribute */ +static const char *xml_test_backslash = + ""; + /****************** ** Test program ** @@ -405,6 +409,12 @@ void Component::construct(Genode::Env &env) log("-- Test parsing XML with comments --"); log_xml_info(xml_test_comments); + log("-- Test backslash as attribute value --"); + { + Xml_node const node(xml_test_backslash); + log("attribute value: '", node.attribute_value("attr", String<10>()), "'\n"); + } + log("-- Test exporting decoded content from XML node --"); test_decoded_content<~0UL>(env, 1, xml_test_comments, 8, 119); test_decoded_content<119 >(env, 2, xml_test_comments, 8, 119); diff --git a/repos/libports/src/lib/libc/file_operations.cc b/repos/libports/src/lib/libc/file_operations.cc index 82793c5b79..5cf4e7351b 100644 --- a/repos/libports/src/lib/libc/file_operations.cc +++ b/repos/libports/src/lib/libc/file_operations.cc @@ -96,19 +96,8 @@ static Absolute_path &cwd() return _cwd_ptr->cwd(); } -/** - * path element token - */ -struct Scanner_policy_path_element -{ - static bool identifier_char(char c, unsigned /* i */) - { - return (c != '/') && (c != 0); - } -}; - -typedef Token Path_element_token; +typedef Token Path_element_token; /** diff --git a/repos/libports/src/lib/libc/kernel.cc b/repos/libports/src/lib/libc/kernel.cc index ff5db0bed8..dc8d4b9e9b 100644 --- a/repos/libports/src/lib/libc/kernel.cc +++ b/repos/libports/src/lib/libc/kernel.cc @@ -72,17 +72,7 @@ void Libc::Kernel::reset_malloc_heap() void Libc::Kernel::_init_file_descriptors() { - /** - * path element token - */ - struct Scanner_policy_path_element - { - static bool identifier_char(char c, unsigned /* i */) - { - return (c != '/') && (c != 0); - } - }; - typedef Genode::Token Path_element_token; + typedef Genode::Token Path_element_token; auto resolve_symlinks = [&] (Absolute_path next_iteration_working_path, Absolute_path &resolved_path) { diff --git a/repos/os/include/vfs/types.h b/repos/os/include/vfs/types.h index 51ba3c3210..d16ff3279d 100644 --- a/repos/os/include/vfs/types.h +++ b/repos/os/include/vfs/types.h @@ -91,6 +91,19 @@ namespace Vfs { }; typedef Genode::Path Absolute_path; + + struct Scanner_policy_path_element + { + static bool identifier_char(char c, unsigned /* i */) + { + return (c != '/') && (c != 0); + } + + static bool end_of_quote(const char *s) + { + return s[0] != '\\' && s[1] == '\"'; + } + }; } #endif /* _INCLUDE__VFS__TYPES_H_ */ diff --git a/repos/os/src/lib/net/ipv4.cc b/repos/os/src/lib/net/ipv4.cc index 25f9ca555c..e4383c54c8 100644 --- a/repos/os/src/lib/net/ipv4.cc +++ b/repos/os/src/lib/net/ipv4.cc @@ -94,6 +94,9 @@ struct Scanner_policy_number { static bool identifier_char(char c, unsigned) { return Genode::is_digit(c) && c !='.'; } + + static bool end_of_quote(const char *s) { + return s[0] != '\\' && s[1] == '\"'; } }; diff --git a/repos/os/src/lib/vfs/tar_file_system.h b/repos/os/src/lib/vfs/tar_file_system.h index a06f3e781a..822022d6b0 100644 --- a/repos/os/src/lib/vfs/tar_file_system.h +++ b/repos/os/src/lib/vfs/tar_file_system.h @@ -287,14 +287,6 @@ class Vfs::Tar_file_system : public File_system } }; - struct Scanner_policy_path_element - { - static bool identifier_char(char c, unsigned /* i */) - { - return (c != '/') && (c != 0); - } - }; - typedef Genode::Token Path_element_token;