mirror of
https://github.com/genodelabs/genode.git
synced 2025-01-30 08:03:59 +00:00
tool/dts: for extracting content from dts files
The new tool eases the inspection and pruning of device-tree source files. Fixes #4165
This commit is contained in:
parent
0069660958
commit
a13dee8d19
26
tool/dts/README
Normal file
26
tool/dts/README
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
Utilities for extracting information from device-tree sources
|
||||||
|
|
||||||
|
Device trees are prominently used in the Linux world for describing the
|
||||||
|
structure of SoCs as for the parametrisation of device drivers. The utilities
|
||||||
|
found in this directory ease the extraction of interesting information from
|
||||||
|
device-tree files.
|
||||||
|
|
||||||
|
The utilities found in this directory operate on device-tree source (dts)
|
||||||
|
files as opposed to device-tree binaries (dtb) because dts files contain
|
||||||
|
information about the inter-dependencies between nodes in textual form
|
||||||
|
(labels) instead of mere numbers (phandles).
|
||||||
|
|
||||||
|
In the Linux source tree, dts files can be found at _arch/<arch>/boot/dts/_.
|
||||||
|
Most dts files leverage the C preprocessor to include fragments in the form of
|
||||||
|
dtsi files. To generate the complete device-tree source information for a
|
||||||
|
given dts file, the C preprocessor must be invoked as follows:
|
||||||
|
|
||||||
|
! cpp -Iinclude -x assembler-with-cpp -P <dts-file>
|
||||||
|
|
||||||
|
The _parse_ tool generates a syntax tree from a dts file. It is not meant to
|
||||||
|
be invoked directly but it is used as a helper by other tools.
|
||||||
|
|
||||||
|
The _extract_ tool can be used to query structural information from a dts
|
||||||
|
file and to prune a device tree specifically for a subset of devices. For
|
||||||
|
usage information, execute the tool without arguments.
|
470
tool/dts/extract
Executable file
470
tool/dts/extract
Executable file
@ -0,0 +1,470 @@
|
|||||||
|
#!/usr/bin/tclsh
|
||||||
|
|
||||||
|
#
|
||||||
|
# \brief Extract subtree from device-tree source
|
||||||
|
# \author Norman Feske
|
||||||
|
# \date 2021-05-17
|
||||||
|
#
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return true if command-line switch was specified
|
||||||
|
#
|
||||||
|
proc get_cmd_switch { arg_name } {
|
||||||
|
global argv
|
||||||
|
return [expr [lsearch $argv $arg_name] >= 0]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return command-line argument value
|
||||||
|
#
|
||||||
|
# If a argument name is specified multiple times, a
|
||||||
|
# list of argument values is returned.
|
||||||
|
#
|
||||||
|
proc get_cmd_arg { arg_name default_value } {
|
||||||
|
global argv
|
||||||
|
|
||||||
|
# find argument name in argv list
|
||||||
|
set arg_idx_list [lsearch -all $argv $arg_name]
|
||||||
|
|
||||||
|
if {[llength $arg_idx_list] == 0} { return $default_value }
|
||||||
|
|
||||||
|
set result {}
|
||||||
|
foreach arg_idx $arg_idx_list {
|
||||||
|
set next_idx [expr $arg_idx + 1]
|
||||||
|
|
||||||
|
# stop if argv ends with the argument name
|
||||||
|
if {$next_idx >= [llength $argv]} continue
|
||||||
|
|
||||||
|
# return list element following the argument name
|
||||||
|
lappend result [lindex $argv $next_idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
# if argument occurred only once, return its value
|
||||||
|
if {[llength $result] == 1} { return [lindex $result 0] }
|
||||||
|
|
||||||
|
# if argument occurred multiple times, contain list of arguments
|
||||||
|
return $result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
## Read input and fill internal representation ##
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
##
|
||||||
|
# Find location of 'parse'
|
||||||
|
#
|
||||||
|
# We expect to find 'parse' in the same directory as we are located. The
|
||||||
|
# question is: Where are we?
|
||||||
|
##
|
||||||
|
proc parse_file { } {
|
||||||
|
global argv0
|
||||||
|
|
||||||
|
set path $argv0
|
||||||
|
|
||||||
|
if {[file type $path] == "link"} {
|
||||||
|
set path [file readlink $path] }
|
||||||
|
|
||||||
|
set parse_file "[file dirname $path]/parse"
|
||||||
|
|
||||||
|
if {![file exists $parse_file]} {
|
||||||
|
puts stderr "Error: Could not find 'parse' in '$path'."
|
||||||
|
exit -1
|
||||||
|
}
|
||||||
|
return $parse_file
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
set input_source [lindex $argv end]
|
||||||
|
if {[catch {
|
||||||
|
set tokens [exec [parse_file] $input_source]
|
||||||
|
}]} {
|
||||||
|
foreach line {
|
||||||
|
""
|
||||||
|
"Extract subtree from device-tree source"
|
||||||
|
"\n usage: extract \[options\] <dts_file>"
|
||||||
|
""
|
||||||
|
"Supported options are:"
|
||||||
|
""
|
||||||
|
" --nodes Print list of DTS nodes"
|
||||||
|
" --labels Print list of labels with their DTS paths"
|
||||||
|
" --references Print referenced labels for each DTS node"
|
||||||
|
" --dot-graph Generate dot file for graphviz"
|
||||||
|
" --select <label> Print DTS related to specified label,"
|
||||||
|
" can be specified multiple times"
|
||||||
|
""
|
||||||
|
"Alternatively to specifying a dts file as argument, the input"
|
||||||
|
"data can be passed via standard input by specifying '-' as"
|
||||||
|
"file name."
|
||||||
|
""
|
||||||
|
"The result of the conversion will be written to stdout."
|
||||||
|
""
|
||||||
|
} { puts stderr $line }
|
||||||
|
exit -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
foreach token $tokens {
|
||||||
|
set name [lindex $token 0]
|
||||||
|
set line [lindex $token 1]
|
||||||
|
set text [lindex $token 2]
|
||||||
|
set tok_text($name) "$text"
|
||||||
|
set tok_line($name) $line
|
||||||
|
}
|
||||||
|
|
||||||
|
if {![info exists tok_text(content0)]} {
|
||||||
|
puts stderr "Error: input contains no root token 'content0'."
|
||||||
|
exit -1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc tok_type {token} {
|
||||||
|
regexp {[a-z]+} $token type
|
||||||
|
return $type
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Find and return a sub token of the specified type
|
||||||
|
#
|
||||||
|
proc sub_token {token token_type} {
|
||||||
|
global tok_text
|
||||||
|
|
||||||
|
if {$token == ""} { return "" }
|
||||||
|
if {[regexp "§($token_type\\d+)°" $tok_text($token) dummy sub_token]} {
|
||||||
|
return $sub_token
|
||||||
|
} else {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return label references present at the specified node of the syntax tree
|
||||||
|
#
|
||||||
|
proc labels_referenced_by_node {token} {
|
||||||
|
global tok_text
|
||||||
|
|
||||||
|
set output $tok_text($token)
|
||||||
|
|
||||||
|
set result {}
|
||||||
|
|
||||||
|
while {$output != ""} {
|
||||||
|
|
||||||
|
# consume plain text
|
||||||
|
if {[regexp {^[^§]+} $output plain]} {
|
||||||
|
regsub {^[^§]+} $output "" output }
|
||||||
|
|
||||||
|
# consume token
|
||||||
|
if {[regexp {§(.+?)°} $output dummy subtoken]} {
|
||||||
|
|
||||||
|
# collect label reference
|
||||||
|
if {[tok_type $subtoken] == "reflabelname"} {
|
||||||
|
lappend result $tok_text($subtoken) }
|
||||||
|
|
||||||
|
# search tree for label references but don't traverse sub nodes
|
||||||
|
if {[tok_type $subtoken] != "node"} {
|
||||||
|
set result [concat $result [labels_referenced_by_node $subtoken]]
|
||||||
|
}
|
||||||
|
regsub {§(.+?)°} $output "" output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [lsort -unique $result]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc joined_path {curr_path path_elem} {
|
||||||
|
|
||||||
|
if {$curr_path == "" && $path_elem == "/"} {
|
||||||
|
return "/" }
|
||||||
|
|
||||||
|
if {$curr_path == "/"} {
|
||||||
|
return "/$path_elem" }
|
||||||
|
|
||||||
|
return "$curr_path/$path_elem"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc collect_label_and_references_of_node {token path} {
|
||||||
|
global tok_text
|
||||||
|
global labels
|
||||||
|
global references
|
||||||
|
global selected
|
||||||
|
|
||||||
|
set node_text $tok_text($token)
|
||||||
|
|
||||||
|
foreach ref [labels_referenced_by_node $token] {
|
||||||
|
lappend references($path) $ref }
|
||||||
|
|
||||||
|
set selected($path) 0
|
||||||
|
|
||||||
|
if {[regexp {§(labeldef\d+)°} $node_text dummy]} {
|
||||||
|
set label_name $tok_text([sub_token [sub_token $token labeldef] labelname])
|
||||||
|
set labels($label_name) $path
|
||||||
|
}
|
||||||
|
|
||||||
|
collect_labels_and_references [sub_token $token block] $path
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return path of sub node if token refers to a node or nodesupplement
|
||||||
|
#
|
||||||
|
# Otherwise the path is returned unmodified
|
||||||
|
#
|
||||||
|
proc sub_node_path {token path} {
|
||||||
|
global tok_text
|
||||||
|
global labels
|
||||||
|
|
||||||
|
if {[tok_type $token] == "node"} {
|
||||||
|
set path_elem $tok_text([sub_token $token nodename])
|
||||||
|
set path [joined_path $path $path_elem]
|
||||||
|
}
|
||||||
|
|
||||||
|
if {[tok_type $token] == "nodesupplement"} {
|
||||||
|
set label_name $tok_text([sub_token [sub_token $token reflabel] reflabelname])
|
||||||
|
if {[info exists labels($label_name)]} {
|
||||||
|
set path $labels($label_name) }
|
||||||
|
}
|
||||||
|
|
||||||
|
return $path
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Populate the global 'labels' and 'references' arrays
|
||||||
|
#
|
||||||
|
# This function also initializes the 'selected' array with a key for
|
||||||
|
# each path found and the value to 0. So the existing paths can be
|
||||||
|
# queried by iterating over the 'selected' array names.
|
||||||
|
#
|
||||||
|
proc collect_labels_and_references {{token content0} {curr_path ""}} {
|
||||||
|
global tok_text
|
||||||
|
|
||||||
|
set output $tok_text($token)
|
||||||
|
|
||||||
|
while {$output != ""} {
|
||||||
|
|
||||||
|
# consume plain text
|
||||||
|
if {[regexp {^[^§]+} $output plain]} {
|
||||||
|
regsub {^[^§]+} $output "" output }
|
||||||
|
|
||||||
|
# consume token
|
||||||
|
if {[regexp {§(.+?)°} $output dummy token]} {
|
||||||
|
|
||||||
|
# try to enter node or nodesupplement
|
||||||
|
set path [sub_node_path $token $curr_path]
|
||||||
|
|
||||||
|
if {$path != $curr_path} {
|
||||||
|
collect_label_and_references_of_node $token $path }
|
||||||
|
|
||||||
|
regsub {§(.+?)°} $output "" output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc dump_selected_source {{token content0} {curr_path ""}} {
|
||||||
|
global tok_text
|
||||||
|
global selected
|
||||||
|
|
||||||
|
set output $tok_text($token)
|
||||||
|
|
||||||
|
while {$output != ""} {
|
||||||
|
|
||||||
|
# consume plain text
|
||||||
|
if {[regexp {^[^§]+} $output plain]} {
|
||||||
|
regsub -all {³} $plain "\\&" plain
|
||||||
|
|
||||||
|
if {[info exists selected($curr_path)] && $selected($curr_path)} {
|
||||||
|
puts -nonewline $plain }
|
||||||
|
|
||||||
|
regsub {^[^§]+} $output "" output
|
||||||
|
}
|
||||||
|
|
||||||
|
# consume token
|
||||||
|
if {[regexp {§(.+?)°} $output dummy token]} {
|
||||||
|
dump_selected_source $token [sub_node_path $token $curr_path]
|
||||||
|
regsub {§(.+?)°} $output "" output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc children {parent_path} {
|
||||||
|
global selected
|
||||||
|
|
||||||
|
set result {}
|
||||||
|
foreach path [array names selected] {
|
||||||
|
if {[string match "$parent_path*" $path]} {
|
||||||
|
lappend result $path } }
|
||||||
|
|
||||||
|
return $result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return most specific label for the given node
|
||||||
|
#
|
||||||
|
proc label_of_path {path} {
|
||||||
|
global labels
|
||||||
|
|
||||||
|
set longest_match 0
|
||||||
|
set label ""
|
||||||
|
foreach l [array names labels] {
|
||||||
|
|
||||||
|
if {$labels($l) == $path} {
|
||||||
|
return $l }
|
||||||
|
|
||||||
|
if {[string match "$labels($l)*" $path]} {
|
||||||
|
set len [string length $l]
|
||||||
|
if {$len > $longest_match} {
|
||||||
|
set longest_match $len
|
||||||
|
set label $l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $label
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc select_children {parent_path} {
|
||||||
|
global selected
|
||||||
|
|
||||||
|
foreach path [children $parent_path] {
|
||||||
|
select $path }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc select_dependencies {path} {
|
||||||
|
global references
|
||||||
|
global labels
|
||||||
|
|
||||||
|
if {![info exists references($path)]} {
|
||||||
|
return }
|
||||||
|
|
||||||
|
foreach label $references($path) {
|
||||||
|
select $labels($label) }
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
proc select {path} {
|
||||||
|
global selected
|
||||||
|
|
||||||
|
if {!$selected($path)} {
|
||||||
|
set selected($path) 1
|
||||||
|
select_children $path
|
||||||
|
select_dependencies $path
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
###############
|
||||||
|
## Front end ##
|
||||||
|
###############
|
||||||
|
|
||||||
|
collect_labels_and_references
|
||||||
|
|
||||||
|
|
||||||
|
foreach arg [get_cmd_arg --select {}] {
|
||||||
|
|
||||||
|
# distinguish DTS path argument (starting with /) from label argument
|
||||||
|
if {[string match "/*" $arg]} {
|
||||||
|
|
||||||
|
if {![info exists selected($arg)]} {
|
||||||
|
puts stderr "Error: path '$arg' not present in device-tree source"
|
||||||
|
exit -1
|
||||||
|
}
|
||||||
|
set path $arg
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if {![info exists labels($arg)]} {
|
||||||
|
puts stderr "Error: label '$arg' not present in device-tree source"
|
||||||
|
exit -1
|
||||||
|
}
|
||||||
|
set path $labels($arg)
|
||||||
|
}
|
||||||
|
|
||||||
|
select $path
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# select all nodes between each selected node and the root
|
||||||
|
foreach path [array names selected] {
|
||||||
|
if {$selected($path)} {
|
||||||
|
while {[regexp {(.*)/.+$} $path dummy parent]} {
|
||||||
|
set selected($parent) 1
|
||||||
|
select_dependencies $parent
|
||||||
|
set path $parent
|
||||||
|
}
|
||||||
|
set selected(/) 1
|
||||||
|
select_dependencies /
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if {[get_cmd_switch --labels]} {
|
||||||
|
foreach n [lsort [array names labels]] {
|
||||||
|
puts "$n\t$labels($n)" } }
|
||||||
|
|
||||||
|
|
||||||
|
if {[get_cmd_switch --nodes]} {
|
||||||
|
foreach n [lsort [array names selected]] {
|
||||||
|
puts "$n" } }
|
||||||
|
|
||||||
|
|
||||||
|
if {[get_cmd_switch --references]} {
|
||||||
|
foreach n [lsort [array names references]] {
|
||||||
|
puts "$n\t$references($n)" } }
|
||||||
|
|
||||||
|
|
||||||
|
if {[get_cmd_switch --dot-graph]} {
|
||||||
|
puts "digraph {"
|
||||||
|
puts { rankdir=LR; nodesep=0.01;}
|
||||||
|
puts { node[shape=polygon, height=0, color=gray, fontsize=12, margin="0.1,0.01"];}
|
||||||
|
|
||||||
|
# declare nodes using the DTS label as node name
|
||||||
|
foreach l [array names labels] {
|
||||||
|
puts " \"$l\"" }
|
||||||
|
|
||||||
|
set relations {}
|
||||||
|
|
||||||
|
# gather label-reference dependencies
|
||||||
|
foreach l [array names labels] {
|
||||||
|
set path $labels($l)
|
||||||
|
|
||||||
|
set subtree_nodes $path
|
||||||
|
foreach child_path [children $path] {
|
||||||
|
lappend subtree_nodes $child_path }
|
||||||
|
|
||||||
|
foreach path $subtree_nodes {
|
||||||
|
set label [label_of_path $path]
|
||||||
|
if {[info exists references($path)]} {
|
||||||
|
foreach ref $references($path) {
|
||||||
|
if {$label != $ref} {
|
||||||
|
lappend relations "\"$label\" -> \"$ref\"" } } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# gather parent-child dependencies
|
||||||
|
foreach path [array names selected] {
|
||||||
|
set label [label_of_path $path]
|
||||||
|
while {[regexp {(.*)/.+$} $path dummy parent]} {
|
||||||
|
|
||||||
|
set parent_label [label_of_path $parent]
|
||||||
|
if {$parent_label != "" && $parent_label != $label} {
|
||||||
|
lappend relations "\"$parent_label\" -> \"$label\"" }
|
||||||
|
|
||||||
|
set path $parent
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach relation [lsort -unique $relations] {
|
||||||
|
puts " $relation" }
|
||||||
|
|
||||||
|
puts "}"
|
||||||
|
}
|
||||||
|
|
||||||
|
dump_selected_source
|
261
tool/dts/parse
Executable file
261
tool/dts/parse
Executable file
@ -0,0 +1,261 @@
|
|||||||
|
#!/usr/bin/tclsh
|
||||||
|
|
||||||
|
#
|
||||||
|
# \brief Regular-expression-based device-tree source parser
|
||||||
|
# \author Norman Feske
|
||||||
|
# \date 2021-05-14
|
||||||
|
#
|
||||||
|
|
||||||
|
# read file
|
||||||
|
set input_pathname [lindex $argv 0]
|
||||||
|
if {[catch {
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create root node of the syntax tree
|
||||||
|
#
|
||||||
|
set txt(0) [exec cat $input_pathname]
|
||||||
|
set typ(0) content
|
||||||
|
set num 1
|
||||||
|
}]} {
|
||||||
|
foreach line {
|
||||||
|
""
|
||||||
|
"Parse device-tree source file and output syntax tree."
|
||||||
|
""
|
||||||
|
" usage: parse <source_file>"
|
||||||
|
""
|
||||||
|
} { puts stderr $line }
|
||||||
|
|
||||||
|
exit -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# do not stop parsing (this variable is only used for debugging)
|
||||||
|
set stop 0
|
||||||
|
|
||||||
|
#
|
||||||
|
# Replace all '&' characters from the original input
|
||||||
|
# because they cause trouble with the regexp command.
|
||||||
|
#
|
||||||
|
regsub -all {&} $txt(0) "³" txt(0)
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Extract expression from content
|
||||||
|
#
|
||||||
|
# All expressions that occur in the token types 'typelist'
|
||||||
|
# and that match the 'subexpr' criterion get replaced in
|
||||||
|
# their original token by a reference tag and form a new
|
||||||
|
# token of the type 'newtype'.
|
||||||
|
#
|
||||||
|
# The reference is coded as §<token_type><token_id>°.
|
||||||
|
# Since the reference has the type coded in, we can
|
||||||
|
# match sub tokens of specific types via plain regular
|
||||||
|
# expressions.
|
||||||
|
##
|
||||||
|
proc extract {newtype subexpr typelist} {
|
||||||
|
global num txt typ stop
|
||||||
|
set old_num $num
|
||||||
|
|
||||||
|
if {$stop} { return }
|
||||||
|
|
||||||
|
for {set i 0} {$i < $old_num} {incr i} {
|
||||||
|
if {[lsearch $typelist $typ($i)] > -1} {
|
||||||
|
while {[regexp $subexpr $txt($i) mid]} {
|
||||||
|
|
||||||
|
# new sub text #
|
||||||
|
set typ($num) $newtype
|
||||||
|
set txt($num) $mid
|
||||||
|
|
||||||
|
# substitute expression by a reference #
|
||||||
|
regsub $subexpr $txt($i) "§$newtype$num°" txt($i)
|
||||||
|
incr num
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################
|
||||||
|
## Rules for splitting the input into its elements ##
|
||||||
|
#####################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
# Starting with only the root token (content0) of the syntax tree
|
||||||
|
# containing the whole source code as one string, we extract
|
||||||
|
# typed sub tokens to partition the string into parts of distinct
|
||||||
|
# meanings (token types). In the process of subsequently
|
||||||
|
# applying extraction rules to specific token types, a syntax
|
||||||
|
# tree is formed.
|
||||||
|
#
|
||||||
|
|
||||||
|
# extract C-style line comments
|
||||||
|
extract lcomment {/\*[^\n]*?\*/} content
|
||||||
|
|
||||||
|
# extract C-style multi-line comments
|
||||||
|
extract mlcomment {/\*.*?\*/} content
|
||||||
|
|
||||||
|
# extract strings
|
||||||
|
#
|
||||||
|
# Strings may contain quoted '"' characters.
|
||||||
|
#
|
||||||
|
extract string {\"([^\"]|\")*?\"} content
|
||||||
|
|
||||||
|
# extract labels and references to labels
|
||||||
|
extract labeldef {[\w_][\w\d_-]*:} {content}
|
||||||
|
extract labelname {^[\w_][\w\d_-]*(?=(:|$))} {labeldef}
|
||||||
|
extract reflabel {³[\w_][\w\d_-]*} {content}
|
||||||
|
extract reflabelname {[\w_][\w\d_-]*$} {reflabel}
|
||||||
|
|
||||||
|
# fold parenthesis and blocks
|
||||||
|
extract parenblk {\([^()]*?\)} {content parenblk}
|
||||||
|
extract block {\{[^{}]*?\}} {content parenblk block}
|
||||||
|
extract cell {\<[^<>]*?\>} {content parenblk block}
|
||||||
|
extract openbrace "\{" block
|
||||||
|
extract closebrace "\}" block
|
||||||
|
extract openparen {\(} parenblk
|
||||||
|
extract closeparen {\)} parenblk
|
||||||
|
|
||||||
|
# property assignments
|
||||||
|
extract property {[\w_#][\w\d_\-,.+?#]*\s*=[^;]*;\n?} {block}
|
||||||
|
extract propertyname {^[\w_#][\w\d_\-,.+?#]*(?=\s*=)} {property}
|
||||||
|
extract propertyvalue {§(cell|string)\d+°} {property}
|
||||||
|
|
||||||
|
# root nodes
|
||||||
|
extract node {\s*/\s*§block\d+°;\n?} {content}
|
||||||
|
extract nodename {/(?=\s*§block)} {node}
|
||||||
|
|
||||||
|
# nodes
|
||||||
|
extract node {(/omit-if-no-ref/)?\s*(§labeldef\d+°\s*)?\s*[\w_][\w\d_\-,.+@]*\s*§block\d+°;\n?} {content block}
|
||||||
|
extract nodename {[\w_][\w\d_\-,.+@]*(?=\s*§block)} {node}
|
||||||
|
|
||||||
|
# supplemental node content provided via '&label { ... }' at the top level
|
||||||
|
extract nodesupplement {§reflabel\d+°\s*§block\d+°;\n?} {content}
|
||||||
|
|
||||||
|
|
||||||
|
###############################
|
||||||
|
## Back-end helper functions ##
|
||||||
|
###############################
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return name of reference token with specified index
|
||||||
|
##
|
||||||
|
proc token_by_idx {idx} {
|
||||||
|
global typ;
|
||||||
|
return "$typ($idx)$idx"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return index of specified reference token
|
||||||
|
##
|
||||||
|
proc idx_of_token {token} {
|
||||||
|
regexp {[0-9]+} $token idx
|
||||||
|
return $idx
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return type of specified reference token
|
||||||
|
##
|
||||||
|
proc type_of_token {token} {
|
||||||
|
regexp {[a-z]+} $token type
|
||||||
|
return $type
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return marker for reference token
|
||||||
|
##
|
||||||
|
proc marker {token} {
|
||||||
|
return "§$token°"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Return text referenced by token
|
||||||
|
##
|
||||||
|
proc token_text {token} {
|
||||||
|
global txt
|
||||||
|
return $txt([idx_of_token $token])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Assign a line number to each reference token
|
||||||
|
#
|
||||||
|
# To be able to provide error messages including line numbers, we
|
||||||
|
# determine the line number for each reference token and store it
|
||||||
|
# as an attribute.
|
||||||
|
#
|
||||||
|
# The result of the function is stored in the global 'ln' array.
|
||||||
|
##
|
||||||
|
proc assign_line_numbers {{token content0}} {
|
||||||
|
global ln curr_ln
|
||||||
|
|
||||||
|
if {$token == "content0"} { set curr_ln 1 }
|
||||||
|
|
||||||
|
# assign current line number to current token
|
||||||
|
set ln([idx_of_token $token]) $curr_ln
|
||||||
|
|
||||||
|
# count occurrences of line breaks
|
||||||
|
if {[type_of_token $token] == "line"} { incr curr_ln }
|
||||||
|
if {$token == "\n"} { incr curr_ln }
|
||||||
|
|
||||||
|
# count lines for all sub-tokens
|
||||||
|
set tex [token_text $token]
|
||||||
|
while {$tex != ""} {
|
||||||
|
|
||||||
|
# count and eat raw line breaks (needed if 'whitespace' option is disabled)
|
||||||
|
if {[regexp {^\n} $tex dummy]} {
|
||||||
|
incr curr_ln
|
||||||
|
regsub {\n} $tex "" tex
|
||||||
|
}
|
||||||
|
|
||||||
|
# ignore plain text
|
||||||
|
if {[regexp {^[^§\n]+} $tex plain]} {
|
||||||
|
regsub {^[^§\n]+} $tex "" tex }
|
||||||
|
|
||||||
|
# traverse into token
|
||||||
|
if {[regexp {^§(.+?)°} $tex dummy token]} {
|
||||||
|
assign_line_numbers $token
|
||||||
|
regsub {§(.+?)°} $tex "" tex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Look up line number of specified reference token
|
||||||
|
##
|
||||||
|
proc line_number {token} {
|
||||||
|
global ln
|
||||||
|
return $ln([idx_of_token $token])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Output tokens as valid Tcl List
|
||||||
|
#
|
||||||
|
# The result of this function can be used directly
|
||||||
|
# as input by another Tcl script.
|
||||||
|
##
|
||||||
|
proc dump_tokens { } {
|
||||||
|
global num typ txt
|
||||||
|
set tokens [list]
|
||||||
|
|
||||||
|
for {set i 0} {($i < $num)} {incr i} {
|
||||||
|
set token [token_by_idx $i]
|
||||||
|
set text $txt($i)
|
||||||
|
|
||||||
|
lappend tokens [list $token [line_number $token] $text]
|
||||||
|
}
|
||||||
|
puts $tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
##################
|
||||||
|
## Main program ##
|
||||||
|
##################
|
||||||
|
|
||||||
|
assign_line_numbers
|
||||||
|
|
||||||
|
dump_tokens
|
Loading…
x
Reference in New Issue
Block a user