tool/dts: for extracting content from dts files

The new tool eases the inspection and pruning of device-tree source
files.

Fixes #4165
This commit is contained in:
Norman Feske 2021-05-19 15:59:33 +02:00 committed by Christian Helmuth
parent 0069660958
commit a13dee8d19
3 changed files with 757 additions and 0 deletions

26
tool/dts/README Normal file
View File

@ -0,0 +1,26 @@
Utilities for extracting information from device-tree sources
Device trees are prominently used in the Linux world for describing the
structure of SoCs as for the parametrisation of device drivers. The utilities
found in this directory ease the extraction of interesting information from
device-tree files.
The utilities found in this directory operate on device-tree source (dts)
files as opposed to device-tree binaries (dtb) because dts files contain
information about the inter-dependencies between nodes in textual form
(labels) instead of mere numbers (phandles).
In the Linux source tree, dts files can be found at _arch/<arch>/boot/dts/_.
Most dts files leverage the C preprocessor to include fragments in the form of
dtsi files. To generate the complete device-tree source information for a
given dts file, the C preprocessor must be invoked as follows:
! cpp -Iinclude -x assembler-with-cpp -P <dts-file>
The _parse_ tool generates a syntax tree from a dts file. It is not meant to
be invoked directly but it is used as a helper by other tools.
The _extract_ tool can be used to query structural information from a dts
file and to prune a device tree specifically for a subset of devices. For
usage information, execute the tool without arguments.

470
tool/dts/extract Executable file
View File

@ -0,0 +1,470 @@
#!/usr/bin/tclsh
#
# \brief Extract subtree from device-tree source
# \author Norman Feske
# \date 2021-05-17
#
##
# Return true if command-line switch was specified
#
proc get_cmd_switch { arg_name } {
global argv
return [expr [lsearch $argv $arg_name] >= 0]
}
##
# Return command-line argument value
#
# If a argument name is specified multiple times, a
# list of argument values is returned.
#
proc get_cmd_arg { arg_name default_value } {
global argv
# find argument name in argv list
set arg_idx_list [lsearch -all $argv $arg_name]
if {[llength $arg_idx_list] == 0} { return $default_value }
set result {}
foreach arg_idx $arg_idx_list {
set next_idx [expr $arg_idx + 1]
# stop if argv ends with the argument name
if {$next_idx >= [llength $argv]} continue
# return list element following the argument name
lappend result [lindex $argv $next_idx]
}
# if argument occurred only once, return its value
if {[llength $result] == 1} { return [lindex $result 0] }
# if argument occurred multiple times, contain list of arguments
return $result
}
#################################################
## Read input and fill internal representation ##
#################################################
##
# Find location of 'parse'
#
# We expect to find 'parse' in the same directory as we are located. The
# question is: Where are we?
##
proc parse_file { } {
global argv0
set path $argv0
if {[file type $path] == "link"} {
set path [file readlink $path] }
set parse_file "[file dirname $path]/parse"
if {![file exists $parse_file]} {
puts stderr "Error: Could not find 'parse' in '$path'."
exit -1
}
return $parse_file
}
set input_source [lindex $argv end]
if {[catch {
set tokens [exec [parse_file] $input_source]
}]} {
foreach line {
""
"Extract subtree from device-tree source"
"\n usage: extract \[options\] <dts_file>"
""
"Supported options are:"
""
" --nodes Print list of DTS nodes"
" --labels Print list of labels with their DTS paths"
" --references Print referenced labels for each DTS node"
" --dot-graph Generate dot file for graphviz"
" --select <label> Print DTS related to specified label,"
" can be specified multiple times"
""
"Alternatively to specifying a dts file as argument, the input"
"data can be passed via standard input by specifying '-' as"
"file name."
""
"The result of the conversion will be written to stdout."
""
} { puts stderr $line }
exit -1;
}
foreach token $tokens {
set name [lindex $token 0]
set line [lindex $token 1]
set text [lindex $token 2]
set tok_text($name) "$text"
set tok_line($name) $line
}
if {![info exists tok_text(content0)]} {
puts stderr "Error: input contains no root token 'content0'."
exit -1
}
proc tok_type {token} {
regexp {[a-z]+} $token type
return $type
}
##
# Find and return a sub token of the specified type
#
proc sub_token {token token_type} {
global tok_text
if {$token == ""} { return "" }
if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
return $sub_token
} else {
return ""
}
}
##
# Return label references present at the specified node of the syntax tree
#
proc labels_referenced_by_node {token} {
global tok_text
set output $tok_text($token)
set result {}
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
# consume token
if {[regexp {§(.+?)°} $output dummy subtoken]} {
# collect label reference
if {[tok_type $subtoken] == "reflabelname"} {
lappend result $tok_text($subtoken) }
# search tree for label references but don't traverse sub nodes
if {[tok_type $subtoken] != "node"} {
set result [concat $result [labels_referenced_by_node $subtoken]]
}
regsub {§(.+?)°} $output "" output
}
}
return [lsort -unique $result]
}
proc joined_path {curr_path path_elem} {
if {$curr_path == "" && $path_elem == "/"} {
return "/" }
if {$curr_path == "/"} {
return "/$path_elem" }
return "$curr_path/$path_elem"
}
proc collect_label_and_references_of_node {token path} {
global tok_text
global labels
global references
global selected
set node_text $tok_text($token)
foreach ref [labels_referenced_by_node $token] {
lappend references($path) $ref }
set selected($path) 0
if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
set labels($label_name) $path
}
collect_labels_and_references [sub_token $token block] $path
}
##
# Return path of sub node if token refers to a node or nodesupplement
#
# Otherwise the path is returned unmodified
#
proc sub_node_path {token path} {
global tok_text
global labels
if {[tok_type $token] == "node"} {
set path_elem $tok_text([sub_token $token nodename])
set path [joined_path $path $path_elem]
}
if {[tok_type $token] == "nodesupplement"} {
set label_name $tok_text([sub_token [sub_token $token reflabel] reflabelname])
if {[info exists labels($label_name)]} {
set path $labels($label_name) }
}
return $path
}
##
# Populate the global 'labels' and 'references' arrays
#
# This function also initializes the 'selected' array with a key for
# each path found and the value to 0. So the existing paths can be
# queried by iterating over the 'selected' array names.
#
proc collect_labels_and_references {{token content0} {curr_path ""}} {
global tok_text
set output $tok_text($token)
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub {^[^§]+} $output "" output }
# consume token
if {[regexp {§(.+?)°} $output dummy token]} {
# try to enter node or nodesupplement
set path [sub_node_path $token $curr_path]
if {$path != $curr_path} {
collect_label_and_references_of_node $token $path }
regsub {§(.+?)°} $output "" output
}
}
}
proc dump_selected_source {{token content0} {curr_path ""}} {
global tok_text
global selected
set output $tok_text($token)
while {$output != ""} {
# consume plain text
if {[regexp {^[^§]+} $output plain]} {
regsub -all {³} $plain "\\&" plain
if {[info exists selected($curr_path)] && $selected($curr_path)} {
puts -nonewline $plain }
regsub {^[^§]+} $output "" output
}
# consume token
if {[regexp {§(.+?)°} $output dummy token]} {
dump_selected_source $token [sub_node_path $token $curr_path]
regsub {§(.+?)°} $output "" output
}
}
}
proc children {parent_path} {
global selected
set result {}
foreach path [array names selected] {
if {[string match "$parent_path*" $path]} {
lappend result $path } }
return $result
}
##
# Return most specific label for the given node
#
proc label_of_path {path} {
global labels
set longest_match 0
set label ""
foreach l [array names labels] {
if {$labels($l) == $path} {
return $l }
if {[string match "$labels($l)*" $path]} {
set len [string length $l]
if {$len > $longest_match} {
set longest_match $len
set label $l
}
}
}
return $label
}
proc select_children {parent_path} {
global selected
foreach path [children $parent_path] {
select $path }
}
proc select_dependencies {path} {
global references
global labels
if {![info exists references($path)]} {
return }
foreach label $references($path) {
select $labels($label) }
}
proc select {path} {
global selected
if {!$selected($path)} {
set selected($path) 1
select_children $path
select_dependencies $path
}
}
###############
## Front end ##
###############
collect_labels_and_references
foreach arg [get_cmd_arg --select {}] {
# distinguish DTS path argument (starting with /) from label argument
if {[string match "/*" $arg]} {
if {![info exists selected($arg)]} {
puts stderr "Error: path '$arg' not present in device-tree source"
exit -1
}
set path $arg
} else {
if {![info exists labels($arg)]} {
puts stderr "Error: label '$arg' not present in device-tree source"
exit -1
}
set path $labels($arg)
}
select $path
}
# select all nodes between each selected node and the root
foreach path [array names selected] {
if {$selected($path)} {
while {[regexp {(.*)/.+$} $path dummy parent]} {
set selected($parent) 1
select_dependencies $parent
set path $parent
}
set selected(/) 1
select_dependencies /
}
}
if {[get_cmd_switch --labels]} {
foreach n [lsort [array names labels]] {
puts "$n\t$labels($n)" } }
if {[get_cmd_switch --nodes]} {
foreach n [lsort [array names selected]] {
puts "$n" } }
if {[get_cmd_switch --references]} {
foreach n [lsort [array names references]] {
puts "$n\t$references($n)" } }
if {[get_cmd_switch --dot-graph]} {
puts "digraph {"
puts { rankdir=LR; nodesep=0.01;}
puts { node[shape=polygon, height=0, color=gray, fontsize=12, margin="0.1,0.01"];}
# declare nodes using the DTS label as node name
foreach l [array names labels] {
puts " \"$l\"" }
set relations {}
# gather label-reference dependencies
foreach l [array names labels] {
set path $labels($l)
set subtree_nodes $path
foreach child_path [children $path] {
lappend subtree_nodes $child_path }
foreach path $subtree_nodes {
set label [label_of_path $path]
if {[info exists references($path)]} {
foreach ref $references($path) {
if {$label != $ref} {
lappend relations "\"$label\" -> \"$ref\"" } } }
}
}
# gather parent-child dependencies
foreach path [array names selected] {
set label [label_of_path $path]
while {[regexp {(.*)/.+$} $path dummy parent]} {
set parent_label [label_of_path $parent]
if {$parent_label != "" && $parent_label != $label} {
lappend relations "\"$parent_label\" -> \"$label\"" }
set path $parent
}
}
foreach relation [lsort -unique $relations] {
puts " $relation" }
puts "}"
}
dump_selected_source

261
tool/dts/parse Executable file
View File

@ -0,0 +1,261 @@
#!/usr/bin/tclsh
#
# \brief Regular-expression-based device-tree source parser
# \author Norman Feske
# \date 2021-05-14
#
# read file
set input_pathname [lindex $argv 0]
if {[catch {
#
# Create root node of the syntax tree
#
set txt(0) [exec cat $input_pathname]
set typ(0) content
set num 1
}]} {
foreach line {
""
"Parse device-tree source file and output syntax tree."
""
" usage: parse <source_file>"
""
} { puts stderr $line }
exit -1;
}
# do not stop parsing (this variable is only used for debugging)
set stop 0
#
# Replace all '&' characters from the original input
# because they cause trouble with the regexp command.
#
regsub -all {&} $txt(0) "³" txt(0)
##
# Extract expression from content
#
# All expressions that occur in the token types 'typelist'
# and that match the 'subexpr' criterion get replaced in
# their original token by a reference tag and form a new
# token of the type 'newtype'.
#
# The reference is coded as §<token_type><token_id>°.
# Since the reference has the type coded in, we can
# match sub tokens of specific types via plain regular
# expressions.
##
proc extract {newtype subexpr typelist} {
global num txt typ stop
set old_num $num
if {$stop} { return }
for {set i 0} {$i < $old_num} {incr i} {
if {[lsearch $typelist $typ($i)] > -1} {
while {[regexp $subexpr $txt($i) mid]} {
# new sub text #
set typ($num) $newtype
set txt($num) $mid
# substitute expression by a reference #
regsub $subexpr $txt($i) "§$newtype$num°" txt($i)
incr num
}
}
}
}
#####################################################
## Rules for splitting the input into its elements ##
#####################################################
#
# Starting with only the root token (content0) of the syntax tree
# containing the whole source code as one string, we extract
# typed sub tokens to partition the string into parts of distinct
# meanings (token types). In the process of subsequently
# applying extraction rules to specific token types, a syntax
# tree is formed.
#
# extract C-style line comments
extract lcomment {/\*[^\n]*?\*/} content
# extract C-style multi-line comments
extract mlcomment {/\*.*?\*/} content
# extract strings
#
# Strings may contain quoted '"' characters.
#
extract string {\"([^\"]|\")*?\"} content
# extract labels and references to labels
extract labeldef {[\w_][\w\d_-]*:} {content}
extract labelname {^[\w_][\w\d_-]*(?=(:|$))} {labeldef}
extract reflabel {³[\w_][\w\d_-]*} {content}
extract reflabelname {[\w_][\w\d_-]*$} {reflabel}
# fold parenthesis and blocks
extract parenblk {\([^()]*?\)} {content parenblk}
extract block {\{[^{}]*?\}} {content parenblk block}
extract cell {\<[^<>]*?\>} {content parenblk block}
extract openbrace "\{" block
extract closebrace "\}" block
extract openparen {\(} parenblk
extract closeparen {\)} parenblk
# property assignments
extract property {[\w_#][\w\d_\-,.+?#]*\s*=[^;]*;\n?} {block}
extract propertyname {^[\w_#][\w\d_\-,.+?#]*(?=\s*=)} {property}
extract propertyvalue {§(cell|string)\d+°} {property}
# root nodes
extract node {\s*/\s*§block\d+°;\n?} {content}
extract nodename {/(?=\s*§block)} {node}
# nodes
extract node {(/omit-if-no-ref/)?\s*(§labeldef\d+°\s*)?\s*[\w_][\w\d_\-,.+@]*\s*§block\d+°;\n?} {content block}
extract nodename {[\w_][\w\d_\-,.+@]*(?=\s*§block)} {node}
# supplemental node content provided via '&label { ... }' at the top level
extract nodesupplement {§reflabel\d+°\s*§block\d+°;\n?} {content}
###############################
## Back-end helper functions ##
###############################
##
# Return name of reference token with specified index
##
proc token_by_idx {idx} {
global typ;
return "$typ($idx)$idx"
}
##
# Return index of specified reference token
##
proc idx_of_token {token} {
regexp {[0-9]+} $token idx
return $idx
}
##
# Return type of specified reference token
##
proc type_of_token {token} {
regexp {[a-z]+} $token type
return $type
}
##
# Return marker for reference token
##
proc marker {token} {
return "§$token°"
}
##
# Return text referenced by token
##
proc token_text {token} {
global txt
return $txt([idx_of_token $token])
}
##
# Assign a line number to each reference token
#
# To be able to provide error messages including line numbers, we
# determine the line number for each reference token and store it
# as an attribute.
#
# The result of the function is stored in the global 'ln' array.
##
proc assign_line_numbers {{token content0}} {
global ln curr_ln
if {$token == "content0"} { set curr_ln 1 }
# assign current line number to current token
set ln([idx_of_token $token]) $curr_ln
# count occurrences of line breaks
if {[type_of_token $token] == "line"} { incr curr_ln }
if {$token == "\n"} { incr curr_ln }
# count lines for all sub-tokens
set tex [token_text $token]
while {$tex != ""} {
# count and eat raw line breaks (needed if 'whitespace' option is disabled)
if {[regexp {^\n} $tex dummy]} {
incr curr_ln
regsub {\n} $tex "" tex
}
# ignore plain text
if {[regexp {^[^§\n]+} $tex plain]} {
regsub {^[^§\n]+} $tex "" tex }
# traverse into token
if {[regexp {^§(.+?)°} $tex dummy token]} {
assign_line_numbers $token
regsub {§(.+?)°} $tex "" tex
}
}
}
##
# Look up line number of specified reference token
##
proc line_number {token} {
global ln
return $ln([idx_of_token $token])
}
##
# Output tokens as valid Tcl List
#
# The result of this function can be used directly
# as input by another Tcl script.
##
proc dump_tokens { } {
global num typ txt
set tokens [list]
for {set i 0} {($i < $num)} {incr i} {
set token [token_by_idx $i]
set text $txt($i)
lappend tokens [list $token [line_number $token] $text]
}
puts $tokens
}
##################
## Main program ##
##################
assign_line_numbers
dump_tokens