From ca850c787fa6b3c6820e058d8ccdcc4b37d407dc Mon Sep 17 00:00:00 2001 From: Christian Helmuth Date: Fri, 23 Aug 2019 14:33:49 +0200 Subject: [PATCH] input_filter: dead-key sequence support Issue #3483 --- .../drivers_managed-pc/input_filter.config | 4 +- repos/os/run/input_filter.run | 54 +++- repos/os/src/server/input_filter/README | 57 +++- .../src/server/input_filter/chargen_source.h | 253 ++++++++++++++---- repos/os/src/test/input_filter/main.cc | 7 +- 5 files changed, 317 insertions(+), 58 deletions(-) diff --git a/repos/gems/recipes/raw/drivers_managed-pc/input_filter.config b/repos/gems/recipes/raw/drivers_managed-pc/input_filter.config index 24d3d7a1a7..0ebf6b5fea 100644 --- a/repos/gems/recipes/raw/drivers_managed-pc/input_filter.config +++ b/repos/gems/recipes/raw/drivers_managed-pc/input_filter.config @@ -22,7 +22,6 @@ - @@ -30,6 +29,9 @@ + + + diff --git a/repos/os/run/input_filter.run b/repos/os/run/input_filter.run index fcdfbf779c..8dbddcc9aa 100644 --- a/repos/os/run/input_filter.run +++ b/repos/os/run/input_filter.run @@ -241,9 +241,9 @@ append config { - + - + @@ -268,6 +268,56 @@ append config { + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/repos/os/src/server/input_filter/README b/repos/os/src/server/input_filter/README index bd2ebaab26..32707b3914 100644 --- a/repos/os/src/server/input_filter/README +++ b/repos/os/src/server/input_filter/README @@ -78,14 +78,17 @@ sub nodes: :///: Defines which physical keys are interpreted as modifier keys. Usually, - '' corresponds to shift, '' to control, and '' to altgr - (on German keyboards). Each modifier node may host any number of '' - nodes with their corresponding 'name' attribute. For example: + '' corresponds to shift, '' to control, '' to altgr + (on German keyboards), and '' to Caps Lock. Each modifier node + may host any number of '' nodes with their corresponding 'name' + attribute. For example: ! ! - ! ! + ! + ! + ! The '' node incorporates the content of the ROM module of the specified name into the modifier state. If the ROM module contains a @@ -103,10 +106,48 @@ sub nodes: Each '' may contain any number of '' subnodes. Each '' must have the key name as 'name' attribute. The to-be-emitted character - is defined by the attributes 'ascii', 'char', or 'b0/b1/b2/b3'. The - 'ascii' attribute accepts an integer value between 0 and 127, the - 'char' attribute accepts a single ASCII character, the 'b0/b1/b2/b3' - attributes define the individual bytes of an UTF-8 character. + is defined by the attributes 'ascii', 'char', 'code', or 'b0/b1/b2/b3'. + + :'ascii': accepts an integer value between 0 and 127 + :'char': accepts a single ASCII character + :'code': defines the Unicode codepoint as integer value + :'b0'/'b1'/'b2'/'b3': define the individual bytes of an UTF-8 character + +:: + + A sequence node permits the definition of dead-key/composing + character sequences. With such sequences the character is not + generated instantly on key press but only after the sequence is + completed. If an unfinished sequence can't be completed due to an + unmatched character, the sequence is aborted and no character is + generated. input_filter supports sequences of up to four characters. + + For example, the French AZERTY keyboard layout [1] has a dead key + for Circumflex Accent "^" right of the P key (which is bracket left + "[" on US keyboards). When Circumflex is pressed no visible + character should be generated instantly but the accent must be + combined with a follow-up character, e.g., Circumflex plus "a" + generates â. + + [1] https://docs.microsoft.com/en-us/globalization/keyboards/kbdfr.html + + Dead keys can be defined in the nodes of any by using + codepoints not used for direct output, for example, Combining + Diacritical Marks beginning at U+0300. The French Circumflex example + can be configured like follows. + + ! + ! + ! + ! + ! + ! + ! + ! + ! + ! + ! + ! :: diff --git a/repos/os/src/server/input_filter/chargen_source.h b/repos/os/src/server/input_filter/chargen_source.h index 33705607ac..06fc657c25 100644 --- a/repos/os/src/server/input_filter/chargen_source.h +++ b/repos/os/src/server/input_filter/chargen_source.h @@ -238,6 +238,49 @@ class Input_filter::Chargen_source : public Source, Source::Sink } }; + struct Missing_character_definition { }; + + /** + * Return Unicode codepoint defined in XML node attributes + * + * \throw Missing_character_definition + */ + static Codepoint _codepoint_from_xml_node(Xml_node node) + { + if (node.has_attribute("ascii")) + return Codepoint { node.attribute_value("ascii", 0) }; + + if (node.has_attribute("code")) + return Codepoint { node.attribute_value("code", 0) }; + + if (node.has_attribute("char")) { + + typedef String<2> Value; + Value value = node.attribute_value("char", Value()); + + unsigned char const ascii = value.string()[0]; + + if (ascii < 128) + return Codepoint { ascii }; + + warning("char attribute with non-ascii character " + "'", value, "'"); + throw Missing_character_definition(); + } + + if (node.has_attribute("b0")) { + char const b0 = node.attribute_value("b0", 0L), + b1 = node.attribute_value("b1", 0L), + b2 = node.attribute_value("b2", 0L), + b3 = node.attribute_value("b3", 0L); + + char const buf[5] { b0, b1, b2, b3, 0 }; + return Utf8_ptr(buf).codepoint(); + } + + throw Missing_character_definition(); + } + /** * Map of the states of the physical keys */ @@ -286,49 +329,6 @@ class Input_filter::Chargen_source : public Source, Source::Sink : Key::Rule::Conditions::Modifier::RELEASED; } - struct Missing_character_definition { }; - - /** - * Return UTF8 character defined in XML node attributes - * - * \throw Missing_character_definition - */ - static Codepoint _codepoint_from_xml_node(Xml_node node) - { - if (node.has_attribute("ascii")) - return Codepoint { node.attribute_value("ascii", 0) }; - - if (node.has_attribute("code")) - return Codepoint { node.attribute_value("code", 0) }; - - if (node.has_attribute("char")) { - - typedef String<2> Value; - Value value = node.attribute_value("char", Value()); - - unsigned char const ascii = value.string()[0]; - - if (ascii < 128) - return Codepoint { ascii }; - - warning("char attribute with non-ascii character " - "'", value, "'"); - throw Missing_character_definition(); - } - - if (node.has_attribute("b0")) { - char const b0 = node.attribute_value("b0", 0L), - b1 = node.attribute_value("b1", 0L), - b2 = node.attribute_value("b2", 0L), - b3 = node.attribute_value("b3", 0L); - - char const buf[5] { b0, b1, b2, b3, 0 }; - return Utf8_ptr(buf).codepoint(); - } - - throw Missing_character_definition(); - } - void import_map(Xml_node map) { /* obtain modifier conditions from map attributes */ @@ -368,6 +368,150 @@ class Input_filter::Chargen_source : public Source, Source::Sink mod_rom.enabled(); }); } + /** + * Generate characters from codepoint sequences + */ + class Sequencer + { + private: + + Allocator &_alloc; + + struct Sequence + { + Codepoint seq[4] { Codepoint::INVALID, Codepoint::INVALID, + Codepoint::INVALID, Codepoint::INVALID }; + + unsigned len { 0 }; + + enum Match { MISMATCH , UNFINISHED, COMPLETED }; + + Sequence() { } + + Sequence(Codepoint c0, Codepoint c1, Codepoint c2, Codepoint c3) + : seq { c0, c1, c2, c3 }, len { 4 } { } + + void append(Codepoint c) + { + /* excess codepoints are just dropped */ + if (len < 4) + seq[len++] = c; + } + + /** + * Match 'other' to 'this' until first invalid codepoint in + * 'other', completion, or mismatch + */ + Match match(Sequence const &o) const + { + /* first codepoint must match */ + if (o.seq[0].value != seq[0].value) return MISMATCH; + + for (unsigned i = 1; i < sizeof(seq)/sizeof(*seq); ++i) { + /* end of this sequence means COMPLETED */ + if (!seq[i].valid()) break; + + /* end of other sequence means UNFINISHED */ + if (!o.seq[i].valid()) return UNFINISHED; + + if (o.seq[i].value != seq[i].value) return MISMATCH; + + /* continue until completion with both valid and equal */ + } + return COMPLETED; + } + }; + + struct Rule + { + typedef Sequence::Match Match; + + Registry::Element element; + Sequence const sequence; + Codepoint const code; + + Rule(Registry ®istry, Sequence const &sequence, Codepoint code) + : + element(registry, *this), + sequence(sequence), + code(code) + { } + }; + + Registry _rules { }; + + Sequence _curr_sequence { }; + + public: + + Sequencer(Allocator &alloc) : _alloc(alloc) { } + + ~Sequencer() + { + _rules.for_each([&] (Rule &rule) { + destroy(_alloc, &rule); }); + } + + void import_sequence(Xml_node node) + { + unsigned const invalid { Codepoint::INVALID }; + + Sequence sequence { + Codepoint { node.attribute_value("first", invalid) }, + Codepoint { node.attribute_value("second", invalid) }, + Codepoint { node.attribute_value("third", invalid) }, + Codepoint { node.attribute_value("fourth", invalid) } }; + + new (_alloc) Rule(_rules, sequence, _codepoint_from_xml_node(node)); + } + + Codepoint process(Codepoint codepoint) + { + Codepoint const invalid { Codepoint::INVALID }; + Rule::Match best_match { Sequence::MISMATCH }; + Codepoint result { codepoint }; + Sequence seq { _curr_sequence }; + + seq.append(codepoint); + + _rules.for_each([&] (Rule const &rule) { + /* early return if completed match was found already */ + if (best_match == Sequence::COMPLETED) return; + + Rule::Match const match { rule.sequence.match(seq) }; + switch (match) { + case Sequence::MISMATCH: + return; + case Sequence::UNFINISHED: + best_match = match; + result = invalid; + return; + case Sequence::COMPLETED: + best_match = match; + result = rule.code; + return; + } + }); + + switch (best_match) { + case Sequence::MISMATCH: + /* drop cancellation codepoint of unfinished sequence */ + if (_curr_sequence.len > 0) + result = invalid; + _curr_sequence = Sequence(); + break; + case Sequence::UNFINISHED: + _curr_sequence = seq; + break; + case Sequence::COMPLETED: + _curr_sequence = Sequence(); + break; + } + + return result; + } + } _sequencer; + Owner _owner; Source::Sink &_destination; @@ -442,6 +586,8 @@ class Input_filter::Chargen_source : public Source, Source::Sink /* supplement codepoint information to press event */ key.apply_best_matching_rule(_mod_map, [&] (Codepoint codepoint) { + codepoint = _sequencer.process(codepoint); + ev = Event(Input::Press_char{keycode, codepoint}); if (_char_repeater.constructed()) @@ -499,8 +645,24 @@ class Input_filter::Chargen_source : public Source, Source::Sink * Handle map nodes */ if (node.type() == "map") { - _key_map.import_map(node); - return; + try { + _key_map.import_map(node); + return; + } + catch (Missing_character_definition) { + throw Invalid_config(); } + } + + /* + * Handle sequence nodes + */ + if (node.type() == "sequence") { + try { + _sequencer.import_sequence(node); + return; + } + catch (Missing_character_definition) { + throw Invalid_config(); } } /* @@ -555,6 +717,7 @@ class Input_filter::Chargen_source : public Source, Source::Sink _timer_accessor(timer_accessor), _include_accessor(include_accessor), _key_map(_alloc), + _sequencer(_alloc), _owner(factory), _destination(destination), _source(factory.create_source(_owner, input_sub_node(config), *this)) diff --git a/repos/os/src/test/input_filter/main.cc b/repos/os/src/test/input_filter/main.cc index 2030cfc08a..284a5cfa96 100644 --- a/repos/os/src/test/input_filter/main.cc +++ b/repos/os/src/test/input_filter/main.cc @@ -375,11 +375,14 @@ struct Test::Main : Input_from_filter::Event_handler ev.handle_press([&] (Input::Keycode key, Codepoint codepoint) { auto codepoint_of_step = [] (Xml_node step) { - return Utf8_ptr(step.attribute_value("char", Value()).string()).codepoint(); }; + if (step.has_attribute("codepoint")) + return Codepoint { step.attribute_value("codepoint", 0U) }; + return Utf8_ptr(step.attribute_value("char", Value()).string()).codepoint(); + }; if (step.type() == "expect_press" && step.attribute_value("code", Value()) == Input::key_name(key) - && (!step.has_attribute("char") || + && ((!step.has_attribute("char") && !step.has_attribute("codepoint")) || codepoint_of_step(step).value == codepoint.value)) step_succeeded = true; });