genode/tool/dts/parse
Norman Feske 7ce734d886 tool/dts: accept 'label: &ref { };' syntax
The combination of a label with a node supplement was not yet covered by
the dts/parse tool so far.

Fixes #4962
2023-07-14 12:06:32 +02:00

262 lines
5.8 KiB
Tcl
Executable File

#!/usr/bin/tclsh
#
# \brief Regular-expression-based device-tree source parser
# \author Norman Feske
# \date 2021-05-14
#
# read file
set input_pathname [lindex $argv 0]
if {[catch {
#
# Create root node of the syntax tree
#
set txt(0) [exec cat $input_pathname]
set typ(0) content
set num 1
}]} {
foreach line {
""
"Parse device-tree source file and output syntax tree."
""
" usage: parse <source_file>"
""
} { puts stderr $line }
exit -1;
}
# do not stop parsing (this variable is only used for debugging)
set stop 0
#
# Replace all '&' characters from the original input
# because they cause trouble with the regexp command.
#
regsub -all {&} $txt(0) "³" txt(0)
##
# Extract expression from content
#
# All expressions that occur in the token types 'typelist'
# and that match the 'subexpr' criterion get replaced in
# their original token by a reference tag and form a new
# token of the type 'newtype'.
#
# The reference is coded as §<token_type><token_id>°.
# Since the reference has the type coded in, we can
# match sub tokens of specific types via plain regular
# expressions.
##
proc extract {newtype subexpr typelist} {
global num txt typ stop
set old_num $num
if {$stop} { return }
for {set i 0} {$i < $old_num} {incr i} {
if {[lsearch $typelist $typ($i)] > -1} {
while {[regexp $subexpr $txt($i) mid]} {
# new sub text #
set typ($num) $newtype
set txt($num) $mid
# substitute expression by a reference #
regsub $subexpr $txt($i) "§$newtype$num°" txt($i)
incr num
}
}
}
}
#####################################################
## Rules for splitting the input into its elements ##
#####################################################
#
# Starting with only the root token (content0) of the syntax tree
# containing the whole source code as one string, we extract
# typed sub tokens to partition the string into parts of distinct
# meanings (token types). In the process of subsequently
# applying extraction rules to specific token types, a syntax
# tree is formed.
#
# extract C-style line comments
extract lcomment {/\*[^\n]*?\*/} content
# extract C-style multi-line comments
extract mlcomment {/\*.*?\*/} content
# extract strings
#
# Strings may contain quoted '"' characters.
#
extract string {\"([^\"]|\")*?\"} content
# extract labels and references to labels
extract labeldef {[\w_][\w\d_-]*:} {content}
extract labelname {^[\w_][\w\d_-]*(?=(:|$))} {labeldef}
extract reflabel {³[\w_][\w\d_-]*} {content}
extract reflabelname {[\w_][\w\d_-]*$} {reflabel}
# fold parenthesis and blocks
extract parenblk {\([^()]*?\)} {content parenblk}
extract block {\{[^{}]*?\}} {content parenblk block}
extract cell {\<[^<>]*?\>} {content parenblk block}
extract openbrace "\{" block
extract closebrace "\}" block
extract openparen {\(} parenblk
extract closeparen {\)} parenblk
# property assignments
extract property {[\w_#][\w\d_\-,.+?#]*\s*=[^;]*;\n?} {block}
extract propertyname {^[\w_#][\w\d_\-,.+?#]*(?=\s*=)} {property}
extract propertyvalue {§(cell|string)\d+°} {property}
# root nodes
extract node {\s*/\s*§block\d+°;\n?} {content}
extract nodename {/(?=\s*§block)} {node}
# nodes
extract node {(/omit-if-no-ref/)?\s*(§labeldef\d+°\s*)?\s*[\w_][\w\d_\-,.+@]*\s*§block\d+°;\n?} {content block}
extract nodename {[\w_][\w\d_\-,.+@]*(?=\s*§block)} {node}
# supplemental node content provided via '&label { ... }' at the top level
extract nodesupplement {(§labeldef\d+°\s*)?§reflabel\d+°\s*§block\d+°;\n?} {content}
###############################
## Back-end helper functions ##
###############################
##
# Return name of reference token with specified index
##
proc token_by_idx {idx} {
global typ;
return "$typ($idx)$idx"
}
##
# Return index of specified reference token
##
proc idx_of_token {token} {
regexp {[0-9]+} $token idx
return $idx
}
##
# Return type of specified reference token
##
proc type_of_token {token} {
regexp {[a-z]+} $token type
return $type
}
##
# Return marker for reference token
##
proc marker {token} {
return "§$token°"
}
##
# Return text referenced by token
##
proc token_text {token} {
global txt
return $txt([idx_of_token $token])
}
##
# Assign a line number to each reference token
#
# To be able to provide error messages including line numbers, we
# determine the line number for each reference token and store it
# as an attribute.
#
# The result of the function is stored in the global 'ln' array.
##
proc assign_line_numbers {{token content0}} {
global ln curr_ln
if {$token == "content0"} { set curr_ln 1 }
# assign current line number to current token
set ln([idx_of_token $token]) $curr_ln
# count occurrences of line breaks
if {[type_of_token $token] == "line"} { incr curr_ln }
if {$token == "\n"} { incr curr_ln }
# count lines for all sub-tokens
set tex [token_text $token]
while {$tex != ""} {
# count and eat raw line breaks (needed if 'whitespace' option is disabled)
if {[regexp {^\n} $tex dummy]} {
incr curr_ln
regsub {\n} $tex "" tex
}
# ignore plain text
if {[regexp {^[^§\n]+} $tex plain]} {
regsub {^[^§\n]+} $tex "" tex }
# traverse into token
if {[regexp {^§(.+?)°} $tex dummy token]} {
assign_line_numbers $token
regsub {§(.+?)°} $tex "" tex
}
}
}
##
# Look up line number of specified reference token
##
proc line_number {token} {
global ln
return $ln([idx_of_token $token])
}
##
# Output tokens as valid Tcl List
#
# The result of this function can be used directly
# as input by another Tcl script.
##
proc dump_tokens { } {
global num typ txt
set tokens [list]
for {set i 0} {($i < $num)} {incr i} {
set token [token_by_idx $i]
set text $txt($i)
lappend tokens [list $token [line_number $token] $text]
}
puts $tokens
}
##################
## Main program ##
##################
assign_line_numbers
dump_tokens