xml_node: support backslash as attribute value

XML allows attribute values like <node attr="\"/>. The XML parser
wrongly reflects this case as 'Invalid_syntax'. This behavior stems from
the implicit use of the 'end_of_quote' function, which considers the
sequence of '\"' as a quoted '"' rather than the end of a quoted string.

The patch solves this problem by making the 'end_of_quote' part of
the tokenizer's scanner policy.

The patch removes the 'end_of_quote' function from 'util/string.h'
because it is not universal, and to avoid the ambiguity with
'SCANNER_POLICY::end_of_quote'.

Fixes #4431
This commit is contained in:
Norman Feske 2022-02-17 12:17:19 +01:00
parent 494f881f27
commit c2efa5406e
10 changed files with 58 additions and 47 deletions

View File

@ -512,15 +512,6 @@ namespace Genode {
}
/**
* Check for end of quotation
*
* Checks if next character is non-backslashed quotation mark.
*/
inline bool end_of_quote(const char *s) {
return s[0] != '\\' && s[1] == '\"'; }
/**
* Unpack quoted string
*
@ -537,6 +528,9 @@ namespace Genode {
src++;
auto end_of_quote = [] (const char *s) {
return s[0] != '\\' && s[1] == '\"'; };
size_t i = 0;
for (; *src && !end_of_quote(src - 1) && (i < dst_len - 1); i++) {

View File

@ -40,6 +40,19 @@ struct Genode::Scanner_policy_identifier_with_underline
*/
static bool identifier_char(char c, unsigned i) {
return is_letter(c) || (c == '_') || (i && is_digit(c)); }
/**
* Check for end of quotation
*
* Checks if next character is non-backslashed quotation mark.
* The end of a quoted string is reached when encountering a '"'
* character that is not preceded by a backslash.
*
* \param s pointer to null-terminated string with at least one
* character
*/
static bool end_of_quote(const char *s) {
return s[0] != '\\' && s[1] == '\"'; }
};
@ -184,7 +197,7 @@ class Genode::Token
* Hence, the upper bound of the index is max_len - 2.
*/
unsigned i = 0;
for (; i + 1 < max_len && !end_of_quote(&_start[i]); i++)
for (; i + 1 < max_len && !SCANNER_POLICY::end_of_quote(&_start[i]); i++)
/* string ends without final quotation mark? too bad! */
if (!_start[i]) return 0;

View File

@ -35,13 +35,17 @@ class Genode::Xml_attribute
{
private:
/**
* Scanner policy that accepts hyphens in identifiers
*/
struct Scanner_policy_xml_identifier {
static bool identifier_char(char c, unsigned i) {
struct Scanner_policy_xml_identifier
{
static bool identifier_char(char c, unsigned i)
{
/* accepts hyphens in identifiers */
return is_letter(c) || c == '_' || c == ':'
|| (i && (c == '-' || c == '.' || is_digit(c))); } };
|| (i && (c == '-' || c == '.' || is_digit(c)));
}
static bool end_of_quote(const char *s) { return s[1] == '\"'; }
};
/**
* Define tokenizer that matches XML tags (with hyphens) as identifiers

View File

@ -96,6 +96,9 @@
[init -> test-xml_node] XML node: name = "visible-tag"
[init -> test-xml_node] XML node: name = "visible-tag"
[init -> test-xml_node]
[init -> test-xml_node] -- Test backslash as attribute value --
[init -> test-xml_node] attribute value: '\'
[init -> test-xml_node]
[init -> test-xml_node] -- Test exporting decoded content from XML node --
[init -> test-xml_node] step 1
[init -> test-xml_node] step 2

View File

@ -148,6 +148,10 @@ static const char *xml_test_comments =
"<visible-tag/>"
"</config>";
/* backslash in attribute */
static const char *xml_test_backslash =
"<config attr=\"\\\"/>";
/******************
** Test program **
@ -405,6 +409,12 @@ void Component::construct(Genode::Env &env)
log("-- Test parsing XML with comments --");
log_xml_info(xml_test_comments);
log("-- Test backslash as attribute value --");
{
Xml_node const node(xml_test_backslash);
log("attribute value: '", node.attribute_value("attr", String<10>()), "'\n");
}
log("-- Test exporting decoded content from XML node --");
test_decoded_content<~0UL>(env, 1, xml_test_comments, 8, 119);
test_decoded_content<119 >(env, 2, xml_test_comments, 8, 119);

View File

@ -96,19 +96,8 @@ static Absolute_path &cwd()
return _cwd_ptr->cwd();
}
/**
* path element token
*/
struct Scanner_policy_path_element
{
static bool identifier_char(char c, unsigned /* i */)
{
return (c != '/') && (c != 0);
}
};
typedef Token<Scanner_policy_path_element> Path_element_token;
typedef Token<Vfs::Scanner_policy_path_element> Path_element_token;
/**

View File

@ -72,17 +72,7 @@ void Libc::Kernel::reset_malloc_heap()
void Libc::Kernel::_init_file_descriptors()
{
/**
* path element token
*/
struct Scanner_policy_path_element
{
static bool identifier_char(char c, unsigned /* i */)
{
return (c != '/') && (c != 0);
}
};
typedef Genode::Token<Scanner_policy_path_element> Path_element_token;
typedef Genode::Token<Vfs::Scanner_policy_path_element> Path_element_token;
auto resolve_symlinks = [&] (Absolute_path next_iteration_working_path, Absolute_path &resolved_path)
{

View File

@ -91,6 +91,19 @@ namespace Vfs {
};
typedef Genode::Path<MAX_PATH_LEN> Absolute_path;
struct Scanner_policy_path_element
{
static bool identifier_char(char c, unsigned /* i */)
{
return (c != '/') && (c != 0);
}
static bool end_of_quote(const char *s)
{
return s[0] != '\\' && s[1] == '\"';
}
};
}
#endif /* _INCLUDE__VFS__TYPES_H_ */

View File

@ -94,6 +94,9 @@ struct Scanner_policy_number
{
static bool identifier_char(char c, unsigned) {
return Genode::is_digit(c) && c !='.'; }
static bool end_of_quote(const char *s) {
return s[0] != '\\' && s[1] == '\"'; }
};

View File

@ -287,14 +287,6 @@ class Vfs::Tar_file_system : public File_system
}
};
struct Scanner_policy_path_element
{
static bool identifier_char(char c, unsigned /* i */)
{
return (c != '/') && (c != 0);
}
};
typedef Genode::Token<Scanner_policy_path_element> Path_element_token;