tool/dts/extract: convert regex strings to latin1

The former encoding was UTF-8, which works quite well if LC_CTYPE is ensured to be an UTF-8 codeset (e.g., en_US.UTF-8 or C.UTF-8 . But, if LC_CTYPE is set to C or latin1 for example, the Tcl regex library enters an infinite loop because of unexpected characters used as markers n the strings (e.g., SECTION SIGN U+00A7). Therefore, the extract tool was converted to latin1 with the following commands and now works for LC_CTYPE C and UTF-8 codesets. iconv -f utf-8 -t latin1 tool/dts/extract > /tmp/e cp /tmp/e tool/dts/extract
2025-05-28 13:14:26 +00:00 · 2021-09-28 10:03:45 +02:00 · 2021-09-28 10:03:45 +02:00 · c0a7696c71
commit c0a7696c71
parent d85a448c52
1 changed files with 15 additions and 15 deletions
--- a/tool/dts/extract
+++ b/tool/dts/extract
@ -132,7 +132,7 @@ proc sub_token {token token_type} {
 	global tok_text

 	if {$token == ""} { return "" }
-	if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
+	if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
 		return $sub_token
 	} else {
 		return ""
@ -153,11 +153,11 @@ proc labels_referenced_by_node {token} {
 	while {$output != ""} {

 		# consume plain text
-		if {[regexp {^[^§]+} $output plain]} {
-			regsub {^[^§]+} $output "" output }
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub {^[^§]+} $output "" output }

 		# consume token
-		if {[regexp {§(.+?)°} $output dummy subtoken]} {
+		if {[regexp {§(.+?)°} $output dummy subtoken]} {

 			# collect label reference
 			if {[tok_type $subtoken] == "reflabelname"} {
@ -167,7 +167,7 @@ proc labels_referenced_by_node {token} {
 			if {[tok_type $subtoken] != "node"} {
 				set result [concat $result [labels_referenced_by_node $subtoken]]
 			}
-			regsub {§(.+?)°} $output "" output
+			regsub {§(.+?)°} $output "" output
 		}
 	}
 	return [lsort -unique $result]
@ -199,7 +199,7 @@ proc collect_label_and_references_of_node {token path} {

 	set selected($path) 0

-	if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
+	if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
 		set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
 		set labels($label_name) $path
 	}
@ -247,11 +247,11 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
 	while {$output != ""} {

 		# consume plain text
-		if {[regexp {^[^§]+} $output plain]} {
-			regsub {^[^§]+} $output "" output }
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub {^[^§]+} $output "" output }

 		# consume token
-		if {[regexp {§(.+?)°} $output dummy token]} {
+		if {[regexp {§(.+?)°} $output dummy token]} {

 			# try to enter node or nodesupplement
 			set path [sub_node_path $token $curr_path]
@ -259,7 +259,7 @@ proc collect_labels_and_references {{token content0} {curr_path ""}} {
 			if {$path != $curr_path} {
 				collect_label_and_references_of_node $token $path }

-			regsub {§(.+?)°} $output "" output
+			regsub {§(.+?)°} $output "" output
 		}
 	}
 }
@ -274,19 +274,19 @@ proc dump_selected_source {{token content0} {curr_path ""}} {
 	while {$output != ""} {

 		# consume plain text
-		if {[regexp {^[^§]+} $output plain]} {
-			regsub -all {³}  $plain "\\&" plain
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub -all {³}  $plain "\\&" plain

 			if {[info exists selected($curr_path)] && $selected($curr_path)} {
 				puts -nonewline $plain }

-			regsub {^[^§]+} $output "" output
+			regsub {^[^§]+} $output "" output
 		}

 		# consume token
-		if {[regexp {§(.+?)°} $output dummy token]} {
+		if {[regexp {§(.+?)°} $output dummy token]} {
 			dump_selected_source $token [sub_node_path $token $curr_path]
-			regsub {§(.+?)°} $output "" output
+			regsub {§(.+?)°} $output "" output
 		}
 	}
 }