tool/dts: for extracting content from dts files

The new tool eases the inspection and pruning of device-tree source files. Fixes #4165
2025-01-30 08:03:59 +00:00 · 2021-05-19 15:59:33 +02:00 · 2021-05-19 15:59:33 +02:00 · a13dee8d19
commit a13dee8d19
parent 0069660958
3 changed files with 757 additions and 0 deletions
--- a/tool/dts/README
+++ b/tool/dts/README
@ -0,0 +1,26 @@
       Utilities for extracting information from device-tree sources
 Device trees are prominently used in the Linux world for describing the
 structure of SoCs as for the parametrisation of device drivers. The utilities
 found in this directory ease the extraction of interesting information from
 device-tree files.
 The utilities found in this directory operate on device-tree source (dts)
 files as opposed to device-tree binaries (dtb) because dts files contain
 information about the inter-dependencies between nodes in textual form
 (labels) instead of mere numbers (phandles).
 In the Linux source tree, dts files can be found at _arch/<arch>/boot/dts/_.
 Most dts files leverage the C preprocessor to include fragments in the form of
 dtsi files. To generate the complete device-tree source information for a
 given dts file, the C preprocessor must be invoked as follows:
 ! cpp -Iinclude -x assembler-with-cpp -P <dts-file>
 The _parse_ tool generates a syntax tree from a dts file. It is not meant to
 be invoked directly but it is used as a helper by other tools.
 The _extract_ tool can be used to query structural information from a dts
 file and to prune a device tree specifically for a subset of devices. For
 usage information, execute the tool without arguments.
--- a/tool/dts/extract
+++ b/tool/dts/extract
@ -0,0 +1,470 @@
 #!/usr/bin/tclsh
 #
 # \brief  Extract subtree from device-tree source
 # \author Norman Feske
 # \date   2021-05-17
 #
 ##
 # Return true if command-line switch was specified
 #
 proc get_cmd_switch { arg_name } {
 	global argv
 	return [expr [lsearch $argv $arg_name] >= 0]
 }
 ##
 # Return command-line argument value
 #
 # If a argument name is specified multiple times, a
 # list of argument values is returned.
 #
 proc get_cmd_arg { arg_name default_value } {
 	global argv
 	# find argument name in argv list
 	set arg_idx_list [lsearch -all $argv $arg_name]
 	if {[llength $arg_idx_list] == 0} { return $default_value }
 	set result {}
 	foreach arg_idx $arg_idx_list {
 		set next_idx [expr $arg_idx + 1]
 		# stop if argv ends with the argument name
 		if {$next_idx >= [llength $argv]} continue
 		# return list element following the argument name
 		lappend result [lindex $argv $next_idx]
 	}
 	# if argument occurred only once, return its value
 	if {[llength $result] == 1} { return [lindex $result 0] }
 	# if argument occurred multiple times, contain list of arguments
 	return $result
 }
 #################################################
 ## Read input and fill internal representation ##
 #################################################
 ##
 # Find location of 'parse'
 #
 # We expect to find 'parse' in the same directory as we are located. The
 # question is: Where are we?
 ##
 proc parse_file { } {
 	global argv0
 	set path $argv0
 	if {[file type $path] == "link"} {
 		set path [file readlink $path] }
 	set parse_file "[file dirname $path]/parse"
 	if {![file exists $parse_file]} {
 		puts stderr "Error: Could not find 'parse' in '$path'."
 		exit -1
 	}
 	return $parse_file
 }
 set input_source [lindex $argv end]
 if {[catch {
 		set tokens [exec [parse_file] $input_source]
 }]} {
 	foreach line {
 	        ""
 	        "Extract subtree from device-tree source"
 	        "\n  usage: extract \[options\] <dts_file>"
 	        ""
 	        "Supported options are:"
 	        ""
 	        " --nodes           Print list of DTS nodes"
 	        " --labels          Print list of labels with their DTS paths"
 	        " --references      Print referenced labels for each DTS node"
 	        " --dot-graph       Generate dot file for graphviz"
 	        " --select <label>  Print DTS related to specified label,"
 	        "                   can be specified multiple times"
 	        ""
 	        "Alternatively to specifying a dts file as argument, the input"
 	        "data can be passed via standard input by specifying '-' as"
 	        "file name."
 	        ""
 	        "The result of the conversion will be written to stdout."
 	        ""
 	} { puts stderr $line }
 	exit -1;
 }
 foreach token $tokens {
 	set name [lindex $token 0]
 	set line [lindex $token 1]
 	set text [lindex $token 2]
 	set tok_text($name) "$text"
 	set tok_line($name) $line
 }
 if {![info exists tok_text(content0)]} {
 	puts stderr "Error: input contains no root token 'content0'."
 	exit -1
 }
 proc tok_type {token} {
 	regexp {[a-z]+} $token type
 	return $type
 }
 ##
 # Find and return a sub token of the specified type
 #
 proc sub_token {token token_type} {
 	global tok_text
 	if {$token == ""} { return "" }
 	if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
 		return $sub_token
 	} else {
 		return ""
 	}
 }
 ##
 # Return label references present at the specified node of the syntax tree
 #
 proc labels_referenced_by_node {token} {
 	global tok_text
 	set output $tok_text($token)
 	set result {}
 	while {$output != ""} {
 		# consume plain text
 		if {[regexp {^[^§]+} $output plain]} {
 			regsub {^[^§]+} $output "" output }
 		# consume token
 		if {[regexp {§(.+?)°} $output dummy subtoken]} {
 			# collect label reference
 			if {[tok_type $subtoken] == "reflabelname"} {
 				lappend result $tok_text($subtoken) }
 			# search tree for label references but don't traverse sub nodes
 			if {[tok_type $subtoken] != "node"} {
 				set result [concat $result [labels_referenced_by_node $subtoken]]
 			}
 			regsub {§(.+?)°} $output "" output
 		}
 	}
 	return [lsort -unique $result]
 }
 proc joined_path {curr_path path_elem} {
 	if {$curr_path == "" && $path_elem == "/"} {
 		return "/" }
 	if {$curr_path == "/"} {
 		return "/$path_elem" }
 	return "$curr_path/$path_elem"
 }
 proc collect_label_and_references_of_node {token path} {
 	global tok_text
 	global labels
 	global references
 	global selected
 	set node_text $tok_text($token)
 	foreach ref [labels_referenced_by_node $token] {
 		lappend references($path) $ref }
 	set selected($path) 0
 	if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
 		set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
 		set labels($label_name) $path
 	}
 	collect_labels_and_references [sub_token $token block] $path
 }
 ##
 # Return path of sub node if token refers to a node or nodesupplement
 #
 # Otherwise the path is returned unmodified
 #
 proc sub_node_path {token path} {
 	global tok_text
 	global labels
 	if {[tok_type $token] == "node"} {
 		set path_elem $tok_text([sub_token $token nodename])
 		set path [joined_path $path $path_elem]
 	}
 	if {[tok_type $token] == "nodesupplement"} {
 		set label_name $tok_text([sub_token [sub_token $token reflabel] reflabelname])
 		if {[info exists labels($label_name)]} {
 			set path $labels($label_name) }
 	}
 	return $path
 }
 ##
 # Populate the global 'labels' and 'references' arrays
 #
 # This function also initializes the 'selected' array with a key for
 # each path found and the value to 0. So the existing paths can be
 # queried by iterating over the 'selected' array names.
 #
 proc collect_labels_and_references {{token content0} {curr_path ""}} {
 	global tok_text
 	set output $tok_text($token)
 	while {$output != ""} {
 		# consume plain text
 		if {[regexp {^[^§]+} $output plain]} {
 			regsub {^[^§]+} $output "" output }
 		# consume token
 		if {[regexp {§(.+?)°} $output dummy token]} {
 			# try to enter node or nodesupplement
 			set path [sub_node_path $token $curr_path]
 			if {$path != $curr_path} {
 				collect_label_and_references_of_node $token $path }
 			regsub {§(.+?)°} $output "" output
 		}
 	}
 }
 proc dump_selected_source {{token content0} {curr_path ""}} {
 	global tok_text
 	global selected
 	set output $tok_text($token)
 	while {$output != ""} {
 		# consume plain text
 		if {[regexp {^[^§]+} $output plain]} {
 			regsub -all {³}  $plain "\\&" plain
 			if {[info exists selected($curr_path)] && $selected($curr_path)} {
 				puts -nonewline $plain }
 			regsub {^[^§]+} $output "" output
 		}
 		# consume token
 		if {[regexp {§(.+?)°} $output dummy token]} {
 			dump_selected_source $token [sub_node_path $token $curr_path]
 			regsub {§(.+?)°} $output "" output
 		}
 	}
 }
 proc children {parent_path} {
 	global selected
 	set result {}
 	foreach path [array names selected] {
 		if {[string match "$parent_path*" $path]} {
 			lappend result $path } }
 	return $result
 }
 ##
 # Return most specific label for the given node
 #
 proc label_of_path {path} {
 	global labels
 	set longest_match 0
 	set label ""
 	foreach l [array names labels] {
 		if {$labels($l) == $path} {
 			return $l }
 		if {[string match "$labels($l)*" $path]} {
 			set len [string length $l]
 			if {$len > $longest_match} {
 				set longest_match $len
 				set label $l
 			}
 		}
 	}
 	return $label
 }
 proc select_children {parent_path} {
 	global selected
 	foreach path [children $parent_path] {
 		select $path }
 }
 proc select_dependencies {path} {
 	global references
 	global labels
 	if {![info exists references($path)]} {
 		return }
 	foreach label $references($path) {
 		select $labels($label) }
 }
 proc select {path} {
 	global selected
 	if {!$selected($path)} {
 		set selected($path) 1
 		select_children $path
 		select_dependencies $path
 	}
 }
 ###############
 ## Front end ##
 ###############
 collect_labels_and_references
 foreach arg [get_cmd_arg --select {}] {
 	# distinguish DTS path argument (starting with /) from label argument
 	if {[string match "/*" $arg]} {
 		if {![info exists selected($arg)]} {
 			puts stderr "Error: path '$arg' not present in device-tree source"
 			exit -1
 		}
 		set path $arg
 	} else {
 		if {![info exists labels($arg)]} {
 			puts stderr "Error: label '$arg' not present in device-tree source"
 			exit -1
 		}
 		set path $labels($arg)
 	}
 	select $path
 }
 # select all nodes between each selected node and the root
 foreach path [array names selected] {
 	if {$selected($path)} {
 		while {[regexp {(.*)/.+$} $path dummy parent]} {
 			set selected($parent) 1
 			select_dependencies $parent
 			set path $parent
 		}
 		set selected(/) 1
 		select_dependencies /
 	}
 }
 if {[get_cmd_switch --labels]} {
 	foreach n [lsort [array names labels]] {
 		puts "$n\t$labels($n)" } }
 if {[get_cmd_switch --nodes]} {
 	foreach n [lsort [array names selected]] {
 		puts "$n" } }
 if {[get_cmd_switch --references]} {
 	foreach n [lsort [array names references]] {
 		puts "$n\t$references($n)" } }
 if {[get_cmd_switch --dot-graph]} {
 	puts "digraph {"
 	puts {  rankdir=LR; nodesep=0.01;}
 	puts {  node[shape=polygon, height=0, color=gray, fontsize=12, margin="0.1,0.01"];}
 	# declare nodes using the DTS label as node name
 	foreach l [array names labels] {
 		puts "  \"$l\"" }
 	set relations {}
 	# gather label-reference dependencies
 	foreach l [array names labels] {
 		set path $labels($l)
 		set subtree_nodes $path
 		foreach child_path [children $path] {
 			lappend subtree_nodes $child_path }
 		foreach path $subtree_nodes {
 			set label [label_of_path $path]
 			if {[info exists references($path)]} {
 				foreach ref $references($path) {
 					if {$label != $ref} {
 						lappend relations "\"$label\" -> \"$ref\"" } } }
 		}
 	}
 	# gather parent-child dependencies
 	foreach path [array names selected] {
 		set label [label_of_path $path]
 		while {[regexp {(.*)/.+$} $path dummy parent]} {
 			set parent_label [label_of_path $parent]
 			if {$parent_label != "" && $parent_label != $label} {
 				lappend relations "\"$parent_label\" -> \"$label\"" }
 			set path $parent
 		}
 	}
 	foreach relation [lsort -unique $relations] {
 		puts "  $relation" }
 	puts "}"
 }
 dump_selected_source
--- a/tool/dts/parse
+++ b/tool/dts/parse
@ -0,0 +1,261 @@
 #!/usr/bin/tclsh
 #
 # \brief  Regular-expression-based device-tree source parser
 # \author Norman Feske
 # \date   2021-05-14
 #
 # read file
 set input_pathname [lindex $argv 0]
 if {[catch {
 	#
 	# Create root node of the syntax tree
 	#
 	set txt(0) [exec cat $input_pathname]
 	set typ(0) content
 	set num 1
 }]} {
 	foreach line {
 		""
 		"Parse device-tree source file and output syntax tree."
 		""
 		"  usage: parse <source_file>"
 		""
 	} { puts stderr $line }
 	exit -1;
 }
 # do not stop parsing (this variable is only used for debugging)
 set stop 0
 #
 # Replace all '&' characters from the original input
 # because they cause trouble with the regexp command.
 #
 regsub -all {&} $txt(0) "³" txt(0)
 ##
 # Extract expression from content
 #
 # All expressions that occur in the token types 'typelist'
 # and that match the 'subexpr' criterion get replaced in
 # their original token by a reference tag and form a new
 # token of the type 'newtype'.
 #
 # The reference is coded as §<token_type><token_id>°.
 # Since the reference has the type coded in, we can
 # match sub tokens of specific types via plain regular
 # expressions.
 ##
 proc extract {newtype subexpr typelist} {
 	global num txt typ stop
 	set old_num $num
 	if {$stop} { return }
 	for {set i 0} {$i < $old_num} {incr i} {
 		if {[lsearch $typelist $typ($i)] > -1} {
 			while {[regexp $subexpr $txt($i) mid]} {
 				# new sub text #
 				set typ($num) $newtype
 				set txt($num) $mid
 				# substitute expression by a reference #
 				regsub $subexpr $txt($i)  "§$newtype$num°" txt($i)
 				incr num
 			}
 		}
 	}
 }
 #####################################################
 ## Rules for splitting the input into its elements ##
 #####################################################
 #
 # Starting with only the root token (content0) of the syntax tree
 # containing the whole source code as one string, we extract
 # typed sub tokens to partition the string into parts of distinct
 # meanings (token types).  In the process of subsequently
 # applying extraction rules to specific token types, a syntax
 # tree is formed.
 #
 # extract C-style line comments
 extract lcomment {/\*[^\n]*?\*/} content
 # extract C-style multi-line comments
 extract mlcomment {/\*.*?\*/} content
 # extract strings
 #
 # Strings may contain quoted '"' characters.
 #
 extract string {\"([^\"]|\")*?\"} content
 # extract labels and references to labels
 extract labeldef     {[\w_][\w\d_-]*:}          {content}
 extract labelname    {^[\w_][\w\d_-]*(?=(:|$))} {labeldef}
 extract reflabel     {³[\w_][\w\d_-]*}          {content}
 extract reflabelname {[\w_][\w\d_-]*$}          {reflabel}
 # fold parenthesis and blocks
 extract parenblk {\([^()]*?\)} {content parenblk}
 extract block    {\{[^{}]*?\}} {content parenblk block}
 extract cell     {\<[^<>]*?\>} {content parenblk block}
 extract openbrace  "\{" block
 extract closebrace "\}" block
 extract openparen  {\(} parenblk
 extract closeparen {\)} parenblk
 # property assignments
 extract property      {[\w_#][\w\d_\-,.+?#]*\s*=[^;]*;\n?} {block}
 extract propertyname  {^[\w_#][\w\d_\-,.+?#]*(?=\s*=)}     {property}
 extract propertyvalue {§(cell|string)\d+°}                 {property}
 # root nodes
 extract node     {\s*/\s*§block\d+°;\n?} {content}
 extract nodename {/(?=\s*§block)}        {node}
 # nodes
 extract node     {(/omit-if-no-ref/)?\s*(§labeldef\d+°\s*)?\s*[\w_][\w\d_\-,.+@]*\s*§block\d+°;\n?} {content block}
 extract nodename {[\w_][\w\d_\-,.+@]*(?=\s*§block)} {node}
 # supplemental node content provided via '&label { ... }' at the top level
 extract nodesupplement {§reflabel\d+°\s*§block\d+°;\n?} {content}
 ###############################
 ## Back-end helper functions ##
 ###############################
 ##
 # Return name of reference token with specified index
 ##
 proc token_by_idx {idx} {
 	global typ;
 	return "$typ($idx)$idx"
 }
 ##
 # Return index of specified reference token
 ##
 proc idx_of_token {token} {
 	regexp {[0-9]+} $token idx
 	return $idx
 }
 ##
 # Return type of specified reference token
 ##
 proc type_of_token {token} {
 	regexp {[a-z]+} $token type
 	return $type
 }
 ##
 # Return marker for reference token
 ##
 proc marker {token} {
 	return "§$token°"
 }
 ##
 # Return text referenced by token
 ##
 proc token_text {token} {
 	global txt
 	return $txt([idx_of_token $token])
 }
 ##
 # Assign a line number to each reference token
 #
 # To be able to provide error messages including line numbers, we
 # determine the line number for each reference token and store it
 # as an attribute.
 #
 # The result of the function is stored in the global 'ln' array.
 ##
 proc assign_line_numbers {{token content0}} {
 	global ln curr_ln
 	if {$token == "content0"} { set curr_ln 1 }
 	# assign current line number to current token
 	set ln([idx_of_token $token]) $curr_ln
 	# count occurrences of line breaks
 	if {[type_of_token $token] == "line"} { incr curr_ln }
 	if {$token == "\n"} { incr curr_ln }
 	# count lines for all sub-tokens
 	set tex [token_text $token]
 	while {$tex != ""} {
 		# count and eat raw line breaks (needed if 'whitespace' option is disabled)
 		if {[regexp {^\n} $tex dummy]} {
 			incr curr_ln
 			regsub {\n} $tex "" tex
 		}
 		# ignore plain text
 		if {[regexp {^[^§\n]+} $tex plain]} {
 			regsub {^[^§\n]+} $tex "" tex }
 		# traverse into token
 		if {[regexp {^§(.+?)°} $tex dummy token]} {
 			assign_line_numbers $token
 			regsub {§(.+?)°} $tex "" tex
 		}
 	}
 }
 ##
 # Look up line number of specified reference token
 ##
 proc line_number {token} {
 	global ln
 	return $ln([idx_of_token $token])
 }
 ##
 # Output tokens as valid Tcl List
 #
 # The result of this function can be used directly
 # as input by another Tcl script.
 ##
 proc dump_tokens { } {
 	global num typ txt
 	set tokens [list]
 	for {set i 0} {($i < $num)} {incr i} {
 		set token [token_by_idx $i]
 		set text $txt($i)
 		lappend tokens [list $token [line_number $token] $text]
 	}
 	puts $tokens
 }
 ##################
 ## Main program ##
 ##################
 assign_line_numbers
 dump_tokens