tool/dts: for extracting content from dts files

The new tool eases the inspection and pruning of device-tree source files. Fixes #4165
2025-04-06 19:07:51 +00:00 · 2021-05-19 15:59:33 +02:00 · 2021-05-19 15:59:33 +02:00 · a13dee8d19
commit a13dee8d19
parent 0069660958
3 changed files with 757 additions and 0 deletions
--- a/tool/dts/README
+++ b/tool/dts/README
@ -0,0 +1,26 @@
+
+       Utilities for extracting information from device-tree sources
+
+Device trees are prominently used in the Linux world for describing the
+structure of SoCs as for the parametrisation of device drivers. The utilities
+found in this directory ease the extraction of interesting information from
+device-tree files.
+
+The utilities found in this directory operate on device-tree source (dts)
+files as opposed to device-tree binaries (dtb) because dts files contain
+information about the inter-dependencies between nodes in textual form
+(labels) instead of mere numbers (phandles).
+
+In the Linux source tree, dts files can be found at _arch/<arch>/boot/dts/_.
+Most dts files leverage the C preprocessor to include fragments in the form of
+dtsi files. To generate the complete device-tree source information for a
+given dts file, the C preprocessor must be invoked as follows:
+
+! cpp -Iinclude -x assembler-with-cpp -P <dts-file>
+
+The _parse_ tool generates a syntax tree from a dts file. It is not meant to
+be invoked directly but it is used as a helper by other tools.
+
+The _extract_ tool can be used to query structural information from a dts
+file and to prune a device tree specifically for a subset of devices. For
+usage information, execute the tool without arguments.
--- a/tool/dts/extract
+++ b/tool/dts/extract
@ -0,0 +1,470 @@
+#!/usr/bin/tclsh
+
+#
+# \brief  Extract subtree from device-tree source
+# \author Norman Feske
+# \date   2021-05-17
+#
+
+##
+# Return true if command-line switch was specified
+#
+proc get_cmd_switch { arg_name } {
+	global argv
+	return [expr [lsearch $argv $arg_name] >= 0]
+}
+
+
+##
+# Return command-line argument value
+#
+# If a argument name is specified multiple times, a
+# list of argument values is returned.
+#
+proc get_cmd_arg { arg_name default_value } {
+	global argv
+
+	# find argument name in argv list
+	set arg_idx_list [lsearch -all $argv $arg_name]
+
+	if {[llength $arg_idx_list] == 0} { return $default_value }
+
+	set result {}
+	foreach arg_idx $arg_idx_list {
+		set next_idx [expr $arg_idx + 1]
+
+		# stop if argv ends with the argument name
+		if {$next_idx >= [llength $argv]} continue
+
+		# return list element following the argument name
+		lappend result [lindex $argv $next_idx]
+	}
+
+	# if argument occurred only once, return its value
+	if {[llength $result] == 1} { return [lindex $result 0] }
+
+	# if argument occurred multiple times, contain list of arguments
+	return $result
+}
+
+
+#################################################
+## Read input and fill internal representation ##
+#################################################
+
+##
+# Find location of 'parse'
+#
+# We expect to find 'parse' in the same directory as we are located. The
+# question is: Where are we?
+##
+proc parse_file { } {
+	global argv0
+
+	set path $argv0
+
+	if {[file type $path] == "link"} {
+		set path [file readlink $path] }
+
+	set parse_file "[file dirname $path]/parse"
+
+	if {![file exists $parse_file]} {
+		puts stderr "Error: Could not find 'parse' in '$path'."
+		exit -1
+	}
+	return $parse_file
+}
+
+
+set input_source [lindex $argv end]
+if {[catch {
+		set tokens [exec [parse_file] $input_source]
+}]} {
+	foreach line {
+	        ""
+	        "Extract subtree from device-tree source"
+	        "\n  usage: extract \[options\] <dts_file>"
+	        ""
+	        "Supported options are:"
+	        ""
+	        " --nodes           Print list of DTS nodes"
+	        " --labels          Print list of labels with their DTS paths"
+	        " --references      Print referenced labels for each DTS node"
+	        " --dot-graph       Generate dot file for graphviz"
+	        " --select <label>  Print DTS related to specified label,"
+	        "                   can be specified multiple times"
+	        ""
+	        "Alternatively to specifying a dts file as argument, the input"
+	        "data can be passed via standard input by specifying '-' as"
+	        "file name."
+	        ""
+	        "The result of the conversion will be written to stdout."
+	        ""
+	} { puts stderr $line }
+	exit -1;
+}
+
+
+foreach token $tokens {
+	set name [lindex $token 0]
+	set line [lindex $token 1]
+	set text [lindex $token 2]
+	set tok_text($name) "$text"
+	set tok_line($name) $line
+}
+
+if {![info exists tok_text(content0)]} {
+	puts stderr "Error: input contains no root token 'content0'."
+	exit -1
+}
+
+
+proc tok_type {token} {
+	regexp {[a-z]+} $token type
+	return $type
+}
+
+
+##
+# Find and return a sub token of the specified type
+#
+proc sub_token {token token_type} {
+	global tok_text
+
+	if {$token == ""} { return "" }
+	if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
+		return $sub_token
+	} else {
+		return ""
+	}
+}
+
+
+##
+# Return label references present at the specified node of the syntax tree
+#
+proc labels_referenced_by_node {token} {
+	global tok_text
+
+	set output $tok_text($token)
+
+	set result {}
+
+	while {$output != ""} {
+
+		# consume plain text
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub {^[^§]+} $output "" output }
+
+		# consume token
+		if {[regexp {§(.+?)°} $output dummy subtoken]} {
+
+			# collect label reference
+			if {[tok_type $subtoken] == "reflabelname"} {
+				lappend result $tok_text($subtoken) }
+
+			# search tree for label references but don't traverse sub nodes
+			if {[tok_type $subtoken] != "node"} {
+				set result [concat $result [labels_referenced_by_node $subtoken]]
+			}
+			regsub {§(.+?)°} $output "" output
+		}
+	}
+	return [lsort -unique $result]
+}
+
+
+proc joined_path {curr_path path_elem} {
+
+	if {$curr_path == "" && $path_elem == "/"} {
+		return "/" }
+
+	if {$curr_path == "/"} {
+		return "/$path_elem" }
+
+	return "$curr_path/$path_elem"
+}
+
+
+proc collect_label_and_references_of_node {token path} {
+	global tok_text
+	global labels
+	global references
+	global selected
+
+	set node_text $tok_text($token)
+
+	foreach ref [labels_referenced_by_node $token] {
+		lappend references($path) $ref }
+
+	set selected($path) 0
+
+	if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
+		set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
+		set labels($label_name) $path
+	}
+
+	collect_labels_and_references [sub_token $token block] $path
+}
+
+
+##
+# Return path of sub node if token refers to a node or nodesupplement
+#
+# Otherwise the path is returned unmodified
+#
+proc sub_node_path {token path} {
+	global tok_text
+	global labels
+
+	if {[tok_type $token] == "node"} {
+		set path_elem $tok_text([sub_token $token nodename])
+		set path [joined_path $path $path_elem]
+	}
+
+	if {[tok_type $token] == "nodesupplement"} {
+		set label_name $tok_text([sub_token [sub_token $token reflabel] reflabelname])
+		if {[info exists labels($label_name)]} {
+			set path $labels($label_name) }
+	}
+
+	return $path
+}
+
+
+##
+# Populate the global 'labels' and 'references' arrays
+#
+# This function also initializes the 'selected' array with a key for
+# each path found and the value to 0. So the existing paths can be
+# queried by iterating over the 'selected' array names.
+#
+proc collect_labels_and_references {{token content0} {curr_path ""}} {
+	global tok_text
+
+	set output $tok_text($token)
+
+	while {$output != ""} {
+
+		# consume plain text
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub {^[^§]+} $output "" output }
+
+		# consume token
+		if {[regexp {§(.+?)°} $output dummy token]} {
+
+			# try to enter node or nodesupplement
+			set path [sub_node_path $token $curr_path]
+
+			if {$path != $curr_path} {
+				collect_label_and_references_of_node $token $path }
+
+			regsub {§(.+?)°} $output "" output
+		}
+	}
+}
+
+
+proc dump_selected_source {{token content0} {curr_path ""}} {
+	global tok_text
+	global selected
+
+	set output $tok_text($token)
+
+	while {$output != ""} {
+
+		# consume plain text
+		if {[regexp {^[^§]+} $output plain]} {
+			regsub -all {³}  $plain "\\&" plain
+
+			if {[info exists selected($curr_path)] && $selected($curr_path)} {
+				puts -nonewline $plain }
+
+			regsub {^[^§]+} $output "" output
+		}
+
+		# consume token
+		if {[regexp {§(.+?)°} $output dummy token]} {
+			dump_selected_source $token [sub_node_path $token $curr_path]
+			regsub {§(.+?)°} $output "" output
+		}
+	}
+}
+
+
+proc children {parent_path} {
+	global selected
+
+	set result {}
+	foreach path [array names selected] {
+		if {[string match "$parent_path*" $path]} {
+			lappend result $path } }
+
+	return $result
+}
+
+
+##
+# Return most specific label for the given node
+#
+proc label_of_path {path} {
+	global labels
+
+	set longest_match 0
+	set label ""
+	foreach l [array names labels] {
+
+		if {$labels($l) == $path} {
+			return $l }
+
+		if {[string match "$labels($l)*" $path]} {
+			set len [string length $l]
+			if {$len > $longest_match} {
+				set longest_match $len
+				set label $l
+			}
+		}
+	}
+	return $label
+}
+
+
+proc select_children {parent_path} {
+	global selected
+
+	foreach path [children $parent_path] {
+		select $path }
+}
+
+
+proc select_dependencies {path} {
+	global references
+	global labels
+
+	if {![info exists references($path)]} {
+		return }
+
+	foreach label $references($path) {
+		select $labels($label) }
+}
+
+
+proc select {path} {
+	global selected
+
+	if {!$selected($path)} {
+		set selected($path) 1
+		select_children $path
+		select_dependencies $path
+	}
+}
+
+
+###############
+## Front end ##
+###############
+
+collect_labels_and_references
+
+
+foreach arg [get_cmd_arg --select {}] {
+
+	# distinguish DTS path argument (starting with /) from label argument
+	if {[string match "/*" $arg]} {
+
+		if {![info exists selected($arg)]} {
+			puts stderr "Error: path '$arg' not present in device-tree source"
+			exit -1
+		}
+		set path $arg
+
+	} else {
+
+		if {![info exists labels($arg)]} {
+			puts stderr "Error: label '$arg' not present in device-tree source"
+			exit -1
+		}
+		set path $labels($arg)
+	}
+
+	select $path
+}
+
+
+# select all nodes between each selected node and the root
+foreach path [array names selected] {
+	if {$selected($path)} {
+		while {[regexp {(.*)/.+$} $path dummy parent]} {
+			set selected($parent) 1
+			select_dependencies $parent
+			set path $parent
+		}
+		set selected(/) 1
+		select_dependencies /
+	}
+}
+
+
+if {[get_cmd_switch --labels]} {
+	foreach n [lsort [array names labels]] {
+		puts "$n\t$labels($n)" } }
+
+
+if {[get_cmd_switch --nodes]} {
+	foreach n [lsort [array names selected]] {
+		puts "$n" } }
+
+
+if {[get_cmd_switch --references]} {
+	foreach n [lsort [array names references]] {
+		puts "$n\t$references($n)" } }
+
+
+if {[get_cmd_switch --dot-graph]} {
+	puts "digraph {"
+	puts {  rankdir=LR; nodesep=0.01;}
+	puts {  node[shape=polygon, height=0, color=gray, fontsize=12, margin="0.1,0.01"];}
+
+	# declare nodes using the DTS label as node name
+	foreach l [array names labels] {
+		puts "  \"$l\"" }
+
+	set relations {}
+
+	# gather label-reference dependencies
+	foreach l [array names labels] {
+		set path $labels($l)
+
+		set subtree_nodes $path
+		foreach child_path [children $path] {
+			lappend subtree_nodes $child_path }
+
+		foreach path $subtree_nodes {
+			set label [label_of_path $path]
+			if {[info exists references($path)]} {
+				foreach ref $references($path) {
+					if {$label != $ref} {
+						lappend relations "\"$label\" -> \"$ref\"" } } }
+		}
+	}
+
+	# gather parent-child dependencies
+	foreach path [array names selected] {
+		set label [label_of_path $path]
+		while {[regexp {(.*)/.+$} $path dummy parent]} {
+
+			set parent_label [label_of_path $parent]
+			if {$parent_label != "" && $parent_label != $label} {
+				lappend relations "\"$parent_label\" -> \"$label\"" }
+
+			set path $parent
+		}
+	}
+
+	foreach relation [lsort -unique $relations] {
+		puts "  $relation" }
+
+	puts "}"
+}
+
+dump_selected_source
--- a/tool/dts/parse
+++ b/tool/dts/parse
@ -0,0 +1,261 @@
+#!/usr/bin/tclsh
+
+#
+# \brief  Regular-expression-based device-tree source parser
+# \author Norman Feske
+# \date   2021-05-14
+#
+
+# read file
+set input_pathname [lindex $argv 0]
+if {[catch {
+
+	#
+	# Create root node of the syntax tree
+	#
+	set txt(0) [exec cat $input_pathname]
+	set typ(0) content
+	set num 1
+}]} {
+	foreach line {
+		""
+		"Parse device-tree source file and output syntax tree."
+		""
+		"  usage: parse <source_file>"
+		""
+	} { puts stderr $line }
+
+	exit -1;
+}
+
+# do not stop parsing (this variable is only used for debugging)
+set stop 0
+
+#
+# Replace all '&' characters from the original input
+# because they cause trouble with the regexp command.
+#
+regsub -all {&} $txt(0) "³" txt(0)
+
+
+##
+# Extract expression from content
+#
+# All expressions that occur in the token types 'typelist'
+# and that match the 'subexpr' criterion get replaced in
+# their original token by a reference tag and form a new
+# token of the type 'newtype'.
+#
+# The reference is coded as §<token_type><token_id>°.
+# Since the reference has the type coded in, we can
+# match sub tokens of specific types via plain regular
+# expressions.
+##
+proc extract {newtype subexpr typelist} {
+	global num txt typ stop
+	set old_num $num
+
+	if {$stop} { return }
+
+	for {set i 0} {$i < $old_num} {incr i} {
+		if {[lsearch $typelist $typ($i)] > -1} {
+			while {[regexp $subexpr $txt($i) mid]} {
+
+				# new sub text #
+				set typ($num) $newtype
+				set txt($num) $mid
+
+				# substitute expression by a reference #
+				regsub $subexpr $txt($i)  "§$newtype$num°" txt($i)
+				incr num
+			}
+		}
+	}
+}
+
+
+#####################################################
+## Rules for splitting the input into its elements ##
+#####################################################
+
+#
+# Starting with only the root token (content0) of the syntax tree
+# containing the whole source code as one string, we extract
+# typed sub tokens to partition the string into parts of distinct
+# meanings (token types).  In the process of subsequently
+# applying extraction rules to specific token types, a syntax
+# tree is formed.
+#
+
+# extract C-style line comments
+extract lcomment {/\*[^\n]*?\*/} content
+
+# extract C-style multi-line comments
+extract mlcomment {/\*.*?\*/} content
+
+# extract strings
+#
+# Strings may contain quoted '"' characters.
+#
+extract string {\"([^\"]|\")*?\"} content
+
+# extract labels and references to labels
+extract labeldef     {[\w_][\w\d_-]*:}          {content}
+extract labelname    {^[\w_][\w\d_-]*(?=(:|$))} {labeldef}
+extract reflabel     {³[\w_][\w\d_-]*}          {content}
+extract reflabelname {[\w_][\w\d_-]*$}          {reflabel}
+
+# fold parenthesis and blocks
+extract parenblk {\([^()]*?\)} {content parenblk}
+extract block    {\{[^{}]*?\}} {content parenblk block}
+extract cell     {\<[^<>]*?\>} {content parenblk block}
+extract openbrace  "\{" block
+extract closebrace "\}" block
+extract openparen  {\(} parenblk
+extract closeparen {\)} parenblk
+
+# property assignments
+extract property      {[\w_#][\w\d_\-,.+?#]*\s*=[^;]*;\n?} {block}
+extract propertyname  {^[\w_#][\w\d_\-,.+?#]*(?=\s*=)}     {property}
+extract propertyvalue {§(cell|string)\d+°}                 {property}
+
+# root nodes
+extract node     {\s*/\s*§block\d+°;\n?} {content}
+extract nodename {/(?=\s*§block)}        {node}
+
+# nodes
+extract node     {(/omit-if-no-ref/)?\s*(§labeldef\d+°\s*)?\s*[\w_][\w\d_\-,.+@]*\s*§block\d+°;\n?} {content block}
+extract nodename {[\w_][\w\d_\-,.+@]*(?=\s*§block)} {node}
+
+# supplemental node content provided via '&label { ... }' at the top level
+extract nodesupplement {§reflabel\d+°\s*§block\d+°;\n?} {content}
+
+
+###############################
+## Back-end helper functions ##
+###############################
+
+##
+# Return name of reference token with specified index
+##
+proc token_by_idx {idx} {
+	global typ;
+	return "$typ($idx)$idx"
+}
+
+
+##
+# Return index of specified reference token
+##
+proc idx_of_token {token} {
+	regexp {[0-9]+} $token idx
+	return $idx
+}
+
+
+##
+# Return type of specified reference token
+##
+proc type_of_token {token} {
+	regexp {[a-z]+} $token type
+	return $type
+}
+
+
+##
+# Return marker for reference token
+##
+proc marker {token} {
+	return "§$token°"
+}
+
+
+##
+# Return text referenced by token
+##
+proc token_text {token} {
+	global txt
+	return $txt([idx_of_token $token])
+}
+
+
+##
+# Assign a line number to each reference token
+#
+# To be able to provide error messages including line numbers, we
+# determine the line number for each reference token and store it
+# as an attribute.
+#
+# The result of the function is stored in the global 'ln' array.
+##
+proc assign_line_numbers {{token content0}} {
+	global ln curr_ln
+
+	if {$token == "content0"} { set curr_ln 1 }
+
+	# assign current line number to current token
+	set ln([idx_of_token $token]) $curr_ln
+
+	# count occurrences of line breaks
+	if {[type_of_token $token] == "line"} { incr curr_ln }
+	if {$token == "\n"} { incr curr_ln }
+
+	# count lines for all sub-tokens
+	set tex [token_text $token]
+	while {$tex != ""} {
+
+		# count and eat raw line breaks (needed if 'whitespace' option is disabled)
+		if {[regexp {^\n} $tex dummy]} {
+			incr curr_ln
+			regsub {\n} $tex "" tex
+		}
+
+		# ignore plain text
+		if {[regexp {^[^§\n]+} $tex plain]} {
+			regsub {^[^§\n]+} $tex "" tex }
+
+		# traverse into token
+		if {[regexp {^§(.+?)°} $tex dummy token]} {
+			assign_line_numbers $token
+			regsub {§(.+?)°} $tex "" tex
+		}
+	}
+}
+
+
+##
+# Look up line number of specified reference token
+##
+proc line_number {token} {
+	global ln
+	return $ln([idx_of_token $token])
+}
+
+
+##
+# Output tokens as valid Tcl List
+#
+# The result of this function can be used directly
+# as input by another Tcl script.
+##
+proc dump_tokens { } {
+	global num typ txt
+	set tokens [list]
+
+	for {set i 0} {($i < $num)} {incr i} {
+		set token [token_by_idx $i]
+		set text $txt($i)
+
+		lappend tokens [list $token [line_number $token] $text]
+	}
+	puts $tokens
+}
+
+
+##################
+## Main program ##
+##################
+
+assign_line_numbers
+
+dump_tokens