tool/dts/extract: convert regex strings to latin1

The former encoding was UTF-8, which works quite well if LC_CTYPE is
ensured to be an UTF-8 codeset (e.g., en_US.UTF-8 or C.UTF-8 . But, if
LC_CTYPE is set to C or latin1 for example, the Tcl regex library enters
an infinite loop because of unexpected characters used as markers
n the strings (e.g., SECTION SIGN U+00A7).

Therefore, the extract tool was converted to latin1 with the following
commands and now works for LC_CTYPE C and UTF-8 codesets.

   iconv -f utf-8 -t latin1 tool/dts/extract > /tmp/e
   cp /tmp/e tool/dts/extract
This commit is contained in:
Christian Helmuth 2021-09-28 10:03:45 +02:00 committed by Norman Feske
parent d85a448c52
commit c0a7696c71

View File

@ -132,7 +132,7 @@ proc sub_token {token token_type} {
global tok_text
if {$token == ""} { return "" }
if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
return $sub_token
} else {
return ""
@ -153,11 +153,11 @@ proc labels_referenced_by_node {token} {
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
# consume token
if {[regexp {§(.+?)°} $output dummy subtoken]} {
if {[regexp {§(.+?)°} $output dummy subtoken]} {
# collect label reference
if {[tok_type $subtoken] == "reflabelname"} {
@ -167,7 +167,7 @@ proc labels_referenced_by_node {token} {
if {[tok_type $subtoken] != "node"} {
set result [concat $result [labels_referenced_by_node $subtoken]]
}
regsub {§(.+?)°} $output "" output
regsub {§(.+?)°} $output "" output
}
}
return [lsort -unique $result]
@ -199,7 +199,7 @@ proc collect_label_and_references_of_node {token path} {
set selected($path) 0
if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
set labels($label_name) $path
}
@ -247,11 +247,11 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
# consume token
if {[regexp {§(.+?)°} $output dummy token]} {
if {[regexp {§(.+?)°} $output dummy token]} {
# try to enter node or nodesupplement
set path [sub_node_path $token $curr_path]
@ -259,7 +259,7 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
if {$path != $curr_path} {
collect_label_and_references_of_node $token $path }
regsub {§(.+?)°} $output "" output
regsub {§(.+?)°} $output "" output
}
}
}
@ -274,19 +274,19 @@ proc dump_selected_source {{token content0} {curr_path ""}} {
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub -all {³} $plain "\\&" plain
if {[regexp {^[^§]+} $output plain]} {
regsub -all {³} $plain "\\&" plain
if {[info exists selected($curr_path)] && $selected($curr_path)} {
puts -nonewline $plain }
regsub {^[^§]+} $output "" output
regsub {^[^§]+} $output "" output
}
# consume token
if {[regexp {§(.+?)°} $output dummy token]} {
if {[regexp {§(.+?)°} $output dummy token]} {
dump_selected_source $token [sub_node_path $token $curr_path]
regsub {§(.+?)°} $output "" output
regsub {§(.+?)°} $output "" output
}
}
}