genode/tool/beautify

1771 lines
47 KiB
Plaintext
Raw Normal View History

2011-12-22 15:19:25 +00:00
#!/usr/bin/tclsh
#
# \brief C++-coding-style checking tool
# \author Norman Feske
# \date 2007-08-17
#
# check command line arguments (read input filename and remove argument
regexp -- {-tokens +([^\s]+)} $argv dummy input_token_file
regsub -- {-tokens +([^\s]+)} $argv "" argv]
set config_fix [regexp -- {-fix\M} $argv dummy]
regsub -- {-fix\M} $argv "" argv]
set config_write [regexp -- {-write\M} $argv dummy]
regsub -- {-write\M} $argv "" argv]
#################################################
## Read input and fill internal representation ##
#################################################
##
# Find location of 'parse_cxx'
#
# We expect to find 'parse_cxx' in the same directory
# as we are located. The question is: Where are we?
##
proc parse_cxx_file { } {
global argv0
set path $argv0
if {[file type $path] == "link"} {
set path [file readlink $path] }
set parse_cxx_file "[file dirname $path]/parse_cxx"
if {![file exists $parse_cxx_file]} {
puts stderr "Error: Could not find 'parse_cxx' in '$path'."
exit -1
}
return $parse_cxx_file
}
set input_source [lindex $argv end]
if {![file exists $input_source]} {
puts stderr ""
puts stderr "Check adherence to Genode's C++ coding style"
puts stderr "\n usage: beautify \[-tokens <token_file>\] \[-fix\] \[-write\] \[<source_file>\]"
puts stderr ""
puts stderr "For normal use, <source_file> is the name of the file to analyse."
puts stderr "If the '-tokens' argument is specified, the input is taken"
puts stderr "directly from a file containing a token list. This is useful"
puts stderr "for debugging."
puts stderr "By specifying the '-fix' argument, a proposed version for"
puts stderr "the source file will be written to stdout. This version may"
puts stderr "contain warning directives for manual revision."
puts stderr "If the option '-write' is specific in addition to '-fix', the"
puts stderr "fixed version of the file gets written back to the source code."
puts stderr ""
exit -1;
}
set tokens {}
if {[info exists input_token_file]} {
set tokens [exec cat $input_token_file]
} else {
if {[catch {
set tokens [exec [parse_cxx_file] -format tokens $input_source]
}]} {
puts stderr "Error: parsing the C++ file '$input_source' failed"
puts stderr "Parser output follows:\n$tokens"
}
}
foreach token $tokens {
set name [lindex $token 0]
set line [lindex $token 1]
set text [lindex $token 2]
set tok_text($name) "$text"
set tok_line($name) $line
}
if {![info exists tok_text(content0)]} {
puts stderr "Error: input contains no root token 'content0'."
exit -1
}
##
# Return current (partially corrected) code and abort
##
proc abort {{message ""}} {
global config_write
global config_fix
global input_source
if {$message != ""} { puts stderr "Aborting code check: $message" }
if {$config_fix} {
# use stdout by default
set fd stdout
# if called with '-write', write the changes back to source file
if {$config_write} {
set fd [open $input_source "WRONLY CREAT TRUNC"] }
dump_source content0 $fd
close $fd
}
exit
}
proc msg { message } { puts stderr $message }
##
# Print warning message
##
proc warn {message {token ""} {prefix Warning}} {
global tok_line
set line ""
catch { set line " at line $tok_line($token)" }
puts stderr "$prefix$line: $message"
}
##
# Print error message
##
proc error {message {token ""}} { warn $message $token Error }
##########################
## Source-code back end ##
##########################
##
# Output syntax tree as XML
##
proc dump_source {token fd} {
global tok_text
set output $tok_text($token)
while {$output != ""} {
# consume plain text
if {[regexp {^[^<5E>]+} $output plain]} {
regsub -all {<7B>} $plain "\\\&" plain
puts -nonewline $fd $plain
regsub {^[^<5E>]+} $output "" output
}
# consume token
if {[regexp {<7B>(.+?)<29>} $output dummy subtoken]} {
dump_source $subtoken $fd
regsub {<7B>(.+?)<29>} $output "" output
}
}
# append newline at end of output
if {$token == "content0"} { puts $fd "" }
}
###########################
## Syntax-tree utilities ##
###########################
##
# Select list of tokens of specified type from subtree
##
proc select_by_type {tok_type {token content0} {max_depth 999}} {
global tok_text
if {$max_depth == "0"} { return }
incr max_depth -1
set txt $tok_text($token)
set result [list]
while {$txt != ""} {
# consume plain text
if {[regexp {^[^<5E>]+} $txt plain]} {
regsub {^[^<5E>]+} $txt "" txt }
# consume token
if {[regexp {<7B>(.+?)<29>} $txt dummy subtoken]} {
if {[regexp "^$tok_type\\d+\$" $subtoken dummy]} {
lappend result $subtoken }
# traverse into subtoken
set result [concat $result [select_by_type $tok_type $subtoken $max_depth]]
# eat token
regsub {<7B>(.+?)<29>} $txt "" txt
}
}
return $result
}
##
# Return list of all tokens
##
proc select_all { } { return "content0 [select_by_type {.*}]" }
##
# Return list of tokes contained in the string
##
proc list_of_tokens { string } {
set result { }
while {[regexp {<7B>(\w+\d+)<29>} $string dummy tok]} {
lappend result $tok
regsub {<7B>(\w+\d+)<29>} $string "" string
}
return $result
}
##
# Expand all tokens of string
##
proc expand { txt } {
global tok_text
set pattern {<7B>(.+?)<29>}
while {[regexp $pattern $txt dummy token]} {
# avoid backslash to be used as re back references
regsub -all {\\} $tok_text($token) {\\\\} expanded_txt
regsub -- $pattern $txt $expanded_txt txt
}
return $txt
}
##
# Expand subtree of token
##
proc expand_token { token } {
global tok_text
return [expand $tok_text($token)]
}
##
# Return number of line breaks contained in the speficied sub tree
##
proc count_line_breaks { token } {
set txt [expand_token $token]
set num_lines 0
while {[regsub "\n" $txt "" txt]} { incr num_lines }
return $num_lines
}
##
# Return 1 if token is a leaf
##
proc is_leaf { token } {
global tok_text
if {[regexp {<7B>.*<2A>} $tok_text($token) dummy]} {
return 0 }
return 1
}
##
# Return 1 if token is at the begin of a line
##
proc is_at_begin_of_line { token } {
global tok_text
while {$token != "content0"} {
set parent_token [get_parent $token]
if {[regexp "\n\\s*<2A>$token<65>" $tok_text($parent_token) dummy]} {
return 1 }
if {[regexp "\[^\\s\].*<2A>$token<65>" "$tok_text($parent_token)" dummy]} {
return 0 }
set token $parent_token
}
return 1
}
##
# Returns 1 if token is of one of the specified types
##
proc is_type { token types } {
foreach type $types {
if {[regexp "^$type\\d+" $token dummy]} { return 1 } }
return 0
}
##
# Return next leaf token, or "" if no leaf token exists
##
proc next_leaf { token } {
global tok_text
if {[is_leaf $token]} { return $token }
foreach tok [list_of_tokens $tok_text($token)] {
set leaf_token [next_leaf $tok]
if {$leaf_token != ""} { return $leaf_token }
}
return ""
}
##
# Convert string into a string containing only indentation and alignment
# characters
##
proc indent_filter { string } {
# regsub -all {[^\s]} $string " " string
return $string
}
##
# Return indentation and alignment of leaf token
##
proc indent_of {token} {
global tok_text
set leaf_indent ""
if {[is_leaf $token]} {
regexp {^([\t ]*)} $tok_text($token) dummy leaf_indent }
set parent [get_parent $token]
if {$parent == ""} {return ""}
set parent_text $tok_text($parent)
# expand all characters in front of our token
set prev ""
regexp "^(.*)<29>$token<65>" $parent_text dummy prev
set expanded_indent [expand $prev]
# if the expanded part of the parent token contains the newline, we are done
if {[regexp {\n} $expanded_indent dummy]} {
regexp {[^\n]*$} $expanded_indent indent_after_newline
return [indent_filter "$indent_after_newline$leaf_indent"] }
# keep searching for newline at the parent
return [indent_filter [indent_of $parent]$expanded_indent$leaf_indent]
}
##
# Return alignment level of token
##
proc align_level_of { token } {
regexp {[^\t]*$} [indent_of [next_leaf $token]] alignment
return [string length $alignment]
}
##
# Return indentation level of token
##
proc indent_level_of { token } {
regexp {^\t*} [indent_of [next_leaf $token]] indentation
return [string length $indentation]
}
proc hpos { token } {
return [expr [indent_level_of $token]*4 + [align_level_of $token]] }
##
# Find parent by depth search
##
proc get_parent {token} {
global tok_parent
if {[info exists tok_parent($token)] == 0} { return "" }
return $tok_parent($token)
}
##
# Return 1 if token in located within compound token
##
proc is_within { token compound } {
while {1} {
set parent [get_parent $token]
if {$parent == ""} { return 0 }
if {[is_type $parent $compound]} { return 1 }
set token $parent
}
}
##
# Return characters between token and next newline
##
proc trailing_chars_after { token } {
global tok_text
set parent [get_parent $token]
if {$parent == ""} { return "" }
regexp "<22>$token<65>(.*)\$" $tok_text($parent) dummy chars_after_token
# if parent has newline character after token, return number of characters until newline
if {[regexp "^(.*?)\\n" [expand $chars_after_token] dummy chars_until_newline]} {
return "$chars_until_newline" }
return "[expand $chars_after_token][trailing_chars_after $parent]"
}
##
# Calculate length of token, if merged on a single line
##
proc calc_merged_line_length { token } {
global tok_text
set string $tok_text($token)
# remove leading comment
regsub {^\s*<2A>m?lcomment\d+<2B>\n} $string "" string
set expanded [expand $string]
#
# Check if the entire token fits plus the tokens until the next newline
# fit on a single line
#
set token_len [string length $expanded]
set trailing_len [string length [trailing_chars_after $token]]
return [expr [hpos $token] + $token_len + $trailing_len]
}
############################
## Transformation helpers ##
############################
##
# Strip leading whitespace from first argument of a parenthesis block
#
# The parenthesis block can be a 'parenblk' or 'argparenblk'
##
proc strip_whitespace_from_first_argument { par_token } {
global tok_text
# tolerate newline directly after the parenthesis in function-call argument lists
if {[regexp {<7B>openparen\d+<2B>\n} $tok_text($par_token)] &&
[is_type [get_parent $par_token] function]} return
# strip whitespace residing in the parenthesis token
regsub {(<28>openparen\d+<2B>)\s+} $tok_text($par_token) "\\1" tok_text($par_token)
#
# Further whitespace can be present within the first argument token.
# The par_token consists of 'openparen', arguments, and 'closeparen'.
# The arguments may be strings or identifiers. We just grab the
# second list element and squeeze it.
#
set firstarg_token [lindex [list_of_tokens $tok_text($par_token)] 1]
if {[is_at_begin_of_line $firstarg_token]} return
set firstarg_token [next_leaf $firstarg_token]
regsub {^[\t ]+} $tok_text($firstarg_token) "" tok_text($firstarg_token)
foreach tok [select_by_type {.*} $firstarg_token 1] {
if {[is_type $tok string]} continue
regsub -all {[\t ]+} $tok_text($tok) "" tok_text($tok) }
}
proc strip_whitespace_from_last_argument { par_token } {
global tok_text
# strip whitespace residing in the parenthesis token
regsub {[\t ]+(<28>closeparen\d+<2B>)} $tok_text($par_token) "\\1" tok_text($par_token)
}
##
# Remove newline in front of specified token
#
# The argument must be a leaf token.
##
proc remove_newline_in_front_of { token {replacement " "} } {
global config_fix
global tok_text
if {$config_fix == 0} return
if {![is_at_begin_of_line $token]} return
# remove any alignment spaceing from token
regsub {^[\t ]+} $tok_text($token) "" tok_text($token)
set parent [get_parent $token]
if {$parent == ""} return
# remove leading non-newline whitespace from parent's token
regsub "\[\\t \]+(<28>$token<65>)" $tok_text($parent) "\\1" tok_text($parent)
# look out for the newline to eliminate, replace it
set pattern "\[\\t \]*\n\[\\t \]*(<28>$token<65>)"
if {[regexp $pattern $tok_text($parent) dummy]} {
regsub $pattern $tok_text($parent) "$replacement\\1" tok_text($parent)
return
}
# if there is no newline character at the parent, continue at the parent
if {[regexp "^<5E>$token<65>" $tok_text($parent) dummy]} {
remove_newline_in_front_of $parent $replacement }
}
proc reindent { token } {
global indent_level
global align_level
set indent_level [indent_level_of $token]
set align_level [align_level_of $token]
indent_block $token
}
#######################
## Debugging support ##
#######################
proc dump_class_elems {class_token elem_type} {
global tok_text
set elem_tokens [select_by_type $elem_type $class_token 3]
if {![llength $elem_tokens]} { return }
puts stderr " $elem_type ([llength $elem_tokens]):"
foreach elem_token $elem_tokens {
set function_token [lindex [select_by_type function $elem_token] 0]
set name_token [lindex [select_by_type identifier $function_token] 0]
puts stderr " $tok_text($name_token)"
}
}
### PRINT INFORMATION ABOUT THE CLASSES AND THEIR MEMBER FUNCTIONS ###
proc dump_classes { } {
global tok_text
foreach class_token [select_by_type class] {
set name_token [lindex [select_by_type identifier $class_token] 0]
puts stderr "class $tok_text($name_token):"
foreach elem {constdecl constimpl destdecl destimpl funcdecl funcimpl} {
dump_class_elems $class_token $elem }
}
}
###################
## Preprocessing ##
###################
#
# Determine the parent of each token
#
foreach parent_tok [select_all] {
foreach tok [list_of_tokens $tok_text($parent_tok)] {
set tok_parent($tok) $parent_tok } }
#
# Transform syntax tree such that all indentations are located at
# leaf nodes. However, do not touch preprocessor macros. We won't
# attempt to reindent those.
#
foreach tok [select_all] {
if {[is_type $tok preproc]} continue
if {[is_leaf $tok] && [is_at_begin_of_line $tok]} {
set indent [indent_of $tok]
# remove remove whitespace from the tokens in front of the leaf
remove_newline_in_front_of $tok "\n"
# add indentation at the begin of the leaf token
regsub {^[\t ]*} $tok_text($tok) "$indent" tok_text($tok)
}
}
#########################
## Coding-style checks ##
#########################
#
# There are the following classes of complaints:
#
# Error - Intervention by the user is required,
# The style-checking process stops.
#
# Warning - Style conflicts that can be fixed automatically.
# The style-checking process continues.
#
# Suggestion - Non-essential style suggestions that can be followed
# automatically.
#
#
# Checking valid uses of preprocessor commands
#
msg "Checking preprocessor commands..."
set abort_after_preproc_check 0
foreach tok [select_by_type preproc] {
set parent_tok [get_parent $tok]
if {[is_type $parent_tok content]} continue
if {[is_type $parent_tok block]} {
if {[is_type [get_parent $parent_tok] namespaceblock]} continue
if {[is_type [get_parent $parent_tok] externcblk]} continue
}
error "preprocessor command not allowed at this location" $tok
set abort_after_preproc_check 1
}
if {$abort_after_preproc_check} {
error "further processing not possible because of errors above"
abort
}
#
# Check for C++ comments
#
msg "Checking absence of C++ comments..."
foreach tok [select_by_type cxxcomment] {
puts stderr "Warning at line $tok_line($tok): remove C++ comments, use /* traditional */ comment style"
}
#
# Perform indentation
#
msg "Checking indentation..."
##
# Generate string containing the white space for the given indentation and
# alignment
##
proc gen_indent { indent_level align_level } {
set indent ""
for { set i 0 } { $i < $indent_level } { incr i } { append indent "\t" }
for { set i 0 } { $i < $align_level } { incr i } { append indent " " }
return $indent;
}
##
# Indent multi-line comment
#
proc indent_mlcomment { mlcomment_token indent_level } {
global tok_text
set result {}
set cnt 0
foreach line [split $tok_text($mlcomment_token) "\n"] {
set alignment 0
if {$cnt > 0} { set alignment 1 }
set indentation [gen_indent $indent_level $alignment]
# handle single-line-style comments following code, i.e, a variable declaration
if {[is_at_begin_of_line $mlcomment_token] == 0} {
if {$cnt == 0} {
regexp {^[\t ]*} $line indentation
} else {
set align_level [expr [align_level_of $mlcomment_token] + 3]
set indentation [gen_indent [indent_level_of $mlcomment_token] $align_level]
}
}
regsub {^\s*(.*)$} $line "$indentation\\1" line
incr cnt
lappend result $line
}
return [join $result "\n"]
}
set alignment_stack {}
proc push_alignment { align_level } {
global alignment_stack
lappend alignment_stack $align_level
}
proc pop_alignment { } {
global alignment_stack
set top [lindex $alignment_stack end]
set alignment_stack [lreplace $alignment_stack end end]
return $top
}
##
# Return true if statement is a non-block single statement to be indented
##
proc is_single_indented_statement { token } {
global tok_text
if {![is_at_begin_of_line [next_leaf $token]]} { return 0 }
if {![is_type $token statement] && ![is_type $token lcomment]} { return 0 }
# indent line comment only if it is the first of its parent
if {[is_type $token lcomment]} {
if {[regexp "<22>$token<65>\\s*<2A>keyelse\\d+<2B>" $tok_text([get_parent $token]) dummy]} {
return 0 } }
# child of statement must not be a block
if {[llength [select_by_type block $token 1]] > 0} { return 0 }
# parent must be 'if', 'ifelse', or 'for' token
if {[is_type [get_parent $token] {if while ifelse for}]} { return 1 }
return 0
}
proc is_indented_assignment { block_token } {
global tok_text
if {[llength [select_by_type {assign\w*} $block_token 1]] == 0} { return 0 }
return [regexp {<7B>assign\w*\d+<2B>[\t ]*\n} $tok_text($block_token) dummy]
}
proc first_init_elem { initializer_token } {
global tok_text
return [lindex [list_of_tokens $tok_text($initializer_token)] 1]
}
proc is_normal_assignment { block_token } {
foreach compound {argparenblk enum virtassign} {
if {[is_type $block_token $compound] || [is_within $block_token $compound]} {
return 0 } }
if {[llength [select_by_type {assign\w*} $block_token 1]] > 0} { return 1 }
return 0
}
proc is_followed_by_space { token } {
global tok_text
return [regexp "<22>$token<65>\[\t \]" $tok_text([get_parent $token]) dummy]
}
proc is_indented_parenblk { parenblk } {
global tok_text
if {[is_type $parenblk parenblk] == 0} { return 0 }
# if the opening parenthesis is followed by a newline, we indent the content
if {[regexp {<7B>openparen\d+<2B>\n} $tok_text($parenblk) dummy]} { return 1 }
return 0
}
proc is_comment_within_switch { token } {
if {[is_type $token {lcomment mlcomment}] &&
[is_type [get_parent $token] codeseq] &&
[is_type [get_parent [get_parent $token]] block] &&
[is_type [get_parent [get_parent [get_parent $token]]] statement] &&
[is_type [get_parent [get_parent [get_parent [get_parent $token]]]] switch]} { return 1 }
return 0
}
proc indent_block { block_token } {
global indent_level
global align_level
global tok_text
global tok_line
global config_fix
global depth
if {[is_type $block_token mlcomment]} {
if {$config_fix} {
set tok_text($block_token) [indent_mlcomment $block_token $indent_level]
}
return
}
# preprocessor macros are never indented
if {[is_type $block_token preproc]} {
if {$config_fix} {
set tok [next_leaf $block_token]
regsub {^\s*} $tok_text($tok) "" tok_text($tok)
}
return
}
if {[is_leaf $block_token] && [is_at_begin_of_line $block_token]} {
set curr_indent ""
regexp {^[\t ]*} $tok_text($block_token) curr_indent
set expected_indent [gen_indent $indent_level $align_level]
# preserve alignment of certain operators
if {[is_type $block_token {lshift rshift plus minus}]} {
set expected_indent [gen_indent $indent_level [align_level_of $block_token]]
}
if {$curr_indent != $expected_indent} {
set align_msg ""
if {$align_level > 0} {
set align_msg " and $align_level spaces" }
warn "wrong indentation, please change to $indent_level tabs$align_msg" $block_token
regsub {^[\t ]+} $tok_text($block_token) [gen_indent $indent_level $align_level] tok_text($block_token)
}
return
}
#
# Process non-leaf token
#
if {[is_type $block_token {protected private public}]} { incr indent_level }
if {[is_type $block_token inherit]} {
push_alignment $align_level
set align_level [expr [align_level_of [next_leaf $block_token]] + 2]
}
foreach tok [select_by_type {.*} $block_token 1] {
if {[is_type $tok {"assign\\w*"}] && [is_normal_assignment $block_token]} {
push_alignment $align_level
if {[is_indented_assignment $block_token]} {
incr indent_level
set align_level 0
} else {
set align_level [expr [align_level_of $tok] + 2]
}
}
if {[is_type $tok typename] && [is_type $block_token typedef]} {
push_alignment $align_level
set align_level 8
}
if {[is_type $tok closeparen]} {
if {[is_indented_parenblk $block_token]} { incr indent_level -1 }
set align_level [pop_alignment]
}
if {[is_type $tok closebrace]} { incr indent_level -1 }
if {[is_type $tok {label caselabel publiclabel privatelabel protectedlabel}]} { incr indent_level -1 }
if {[is_comment_within_switch $tok]} { incr indent_level -1 }
if {[is_single_indented_statement $tok]} { incr indent_level }
if {[is_at_begin_of_line $tok] && [is_followed_by_space $tok]} {
if {[is_type $tok {or and}]} { incr align_level -3}
if {[is_type $tok {bitor amper plus minus question}]} { incr align_level -2}
}
indent_block $tok
if {[is_type $tok identifier] && [is_type $block_token vardecl]} {
# handle only the first identifier of a vardecl
if {[lindex [list_of_tokens $tok_text($block_token)] 0] == $tok} {
push_alignment $align_level
#
# Align subsequent lines of variable declaration at the
# second identifier, which should be the name of the
# first variable.
#
set second_token [lindex [list_of_tokens $tok_text($block_token)] 1]
set align_level [align_level_of [next_leaf $second_token]]
}
}
if {[is_at_begin_of_line $tok] && [is_followed_by_space $tok]} {
if {[is_type $tok {or and}]} { incr align_level 3}
if {[is_type $tok {bitor amper plus minus}]} { incr align_level 2}
}
if {[is_type $tok keyreturn]} {
push_alignment $align_level
set align_level 7
}
if {[is_single_indented_statement $tok]} { incr indent_level -1 }
if {[is_type $tok {label caselabel publiclabel privatelabel protectedlabel}]} { incr indent_level }
if {[is_comment_within_switch $tok]} { incr indent_level }
if {[is_type $tok openbrace]} { incr indent_level }
if {[is_type $tok openparen]} {
push_alignment $align_level
if {[is_indented_parenblk $block_token]} {
set align_level 0
incr indent_level
} else {
set align_level [expr [align_level_of $tok] + 1]
}
}
if {[is_type $tok colon] && [is_type $block_token initializer]} {
if {[align_level_of [first_init_elem $block_token]] > 0} {
push_alignment $align_level
set align_level [align_level_of [first_init_elem $block_token]]
} else {
incr indent_level
}
}
}
if {[is_type $block_token {protected private public}]} { incr indent_level -1 }
if {[is_type $block_token {inherit typedef return vardecl}]} { set align_level [pop_alignment] }
if {[is_normal_assignment $block_token]} {
#
# one block token may contain multiple assign tokens, revert
# indentation for each of them
#
foreach assign [select_by_type {assign\w*} $block_token] {
set align_level [pop_alignment]
if {[is_indented_assignment $block_token]} { incr indent_level -1 }
}
}
if {[is_type $block_token initializer]} {
if {[align_level_of [first_init_elem $block_token]] > 0} {
set align_level [pop_alignment]
} else {
incr indent_level -1
}
}
}
set indent_level 0
set align_level 0
indent_block content0
#abort
msg "Checking file header..."
#
# Check empty space in front of file header
#
if {[regexp {^\s+} $tok_text(content0) dummy]} {
warn "empty space before file header at the beginning of the file"
if {$config_fix} {
regsub {^\s+} $tok_text(content0) "" tok_text(content0) }
}
#
# Check existence of file header
#
proc file_header_template { } {
set res ""
append res "/*\n"
append res " * \\brief Brief description of the file\n"
append res " * \\author Name of Author\n"
append res " * \\date [exec date --iso-8601]\n"
append res " */\n"
append res "#warning Style: please fill out the file header and remove this warning\n\n"
return $res
}
if {![regexp {^<5E>mlcomment\d+<2B>} $tok_text(content0) dummy]} {
error "expect file header at the beginning of the file"
if {$config_fix} {
regsub {^(<28>(tab|space|align|line)\d+<2B>)*} $tok_text(content0) [file_header_template] tok_text(content0) }
abort
}
#
# Inspect file header
#
proc check_author { header_line } {
# author
if {[regexp {^ \* \\author([\t ]+)([^\t ].*)} $header_line dummy align author] == 0} {
warn "malformed author field in file header"
return
}
if {[string length $author] < 2} {
warn "incomplete author field in file header"
}
if {$align != " "} {
warn "unaligned author field in file header"
set align " "
}
if {[string is lower [string index $author 0]]} {
warn "author name should start with an upper-case character"
set author [string toupper $author 0 0]
}
return " * \\author$align$author"
}
proc check_file_header { } {
global tok_text
global config_fix
set header_token [lindex [list_of_tokens $tok_text(content0)] 0]
set header [expand_token $header_token]
set header_lines [split $header "\n"]
set cnt 0
# first line
if {[lindex $header_lines 0] != "/*"} {
warn "malformed first line of file header"
lset header_lines 0 "/*"
}
# brief description
if {[regexp {^ \* \\brief(\s+)([^\s].*)} [lindex $header_lines 1] dummy align desc] == 0} {
warn "malformed brief description in file header"
return
}
if {[string length $desc] < 2} {
warn "incomplete brief description in file header"
}
if {$align != " "} {
warn "unaligned brief description in file header"
set align " "
}
if {[string is lower [string index $desc 0]]} {
warn "brief description of file header should start with an upper-case character"
set desc [string toupper $desc 0 0]
}
lset header_lines 1 " * \\brief$align$desc"
set line_cnt 2
while {[regexp {\\author} [lindex $header_lines $line_cnt] dummy]} {
lset header_lines $line_cnt [check_author [lindex $header_lines $line_cnt]]
incr line_cnt
}
# date
if {[regexp {^ \* \\date([\t ]+)([^\t ].*)} [lindex $header_lines $line_cnt] dummy align date] == 0} {
warn "malformed date field in file header"
return
}
if {[regexp {^\d\d\d\d-\d\d-\d\d$} $date dummy] == 0} {
warn "date in file header not in format YYYY-MM-DD"
# attempt to convert german date to YYYY-MM-DD
if {[regexp {(\d\d)\.(\d\d)\.(\d\d\d\d)} $date dummy day month year]} {
if {$month < 13} { set date "$year-$month-$day" }
}
}
if {$align != " "} {
warn "unaligned date field in file header"
set align " "
}
lset header_lines $line_cnt " * \\date$align$date"
incr line_cnt
if {$config_fix} {
set tok_text($header_token) [join $header_lines "\n"] }
}
check_file_header
#
# Check for single empty line after file header
#
if {[regexp {^<5E>mlcomment\d+<2B>\n\n\n+} $tok_text(content0) dummy]} {
warn "expect single empty line after file header"
if {$config_fix} {
regsub {^(<28>mlcomment\d+<2B>\n)\n+} $tok_text(content0) "\\1\n" tok_text(content0) }
}
msg "Checking names of classes, functions, and variables..."
#
# Private members must start with an underscore
#
proc vardecl_name { vardecl_token } {
global tok_text
if {[is_type $vardecl_token vardecl] == 0} return ""
# the variable name is the second identifier of the vardecl token
set name [lindex [select_by_type identifier $vardecl_token 1] 1]
if {$name == ""} { return "" }
return $tok_text($name)
}
proc funcsignature_name { funcsignature_token } {
global tok_text
if {[is_type $funcsignature_token funcsignature] == 0} return ""
return $tok_text([lindex [select_by_type identifier $funcsignature_token 1] 0])
}
foreach prot_type {private protected} {
foreach prot [select_by_type $prot_type] {
foreach vardecl [select_by_type vardecl $prot 2] {
if {[regexp {^_} [vardecl_name $vardecl]]} continue
error "$prot_type member variable name '[vardecl_name $vardecl]' should start with an underscore" $vardecl
}
foreach funcsignature [select_by_type funcsignature $prot 3] {
if {[regexp {^_} [funcsignature_name $funcsignature]]} continue
if {[is_type [get_parent $funcsignature] {constdecl constimpl destdecl destimpl}]} continue
error "$prot_type member function name '[funcsignature_name $funcsignature]' should start with an underscore" $funcsignature
}
}
}
#
# Variables and functions must not contain upper-case characters
#
foreach vardecl [select_by_type vardecl] {
if {[regexp {[A-Z]} [vardecl_name $vardecl] dummy]} {
error "variable name must not contain upper-case characters" $vardecl } }
# do not look at funcsignature tokens of constructors and destructors
foreach func_type "[select_by_type funcdecl] [select_by_type funcimpl]" {
foreach funcsignature [select_by_type funcsignature $func_type 1] {
if {[regexp {[A-Z]} [funcsignature_name $funcsignature] dummy]} {
error "function name must not contain upper-case characters" $funcsignature } }
}
msg "Checking for superfluous semicolons..."
#
# Check for non-needed trailing semicolons
#
foreach tok "content0 [select_by_type namespaceblock]" {
set txt $tok_text($tok)
while {[regexp {<7B>(namespace\d+)<29>\s*;+} $txt dummy ns]} {
set ns_name [select_by_type identifier $ns 1]
warn "unneeded trailing ';' after name-space block of '$tok_text($ns_name)'"
regsub {(<28>namespace\d+<2B>)\s*;+} $txt {\1} txt
}
if {$config_fix} {
set tok_text($tok) $txt }
}
foreach tok_type {funcimpl constimpl destimpl} {
foreach tok [select_by_type $tok_type] {
if {[regexp {[\s;]+$} $tok_text($tok) dummy] == 0} continue
warn "superfluous characters after implementation block" $tok
if {$config_fix} {
regsub {[\s;]+$} $tok_text($tok) "" tok_text($tok)
}
}
}
#
# Check proper use of single spaces
#
msg "Checking for correct spacing..."
proc check_single_spaces {env_token
tok_type tok_type_name
tok_type_next tok_type_name_next
{error_message "Getting confused"}} {
global tok_text config_fix
if {[regexp "<22>($tok_type\\d+)<29>(\\s*)<29>($tok_type_next\\d+)<29>" $tok_text($env_token) dummy tok ws next_token]} {
if {$ws != " "} {
warn "Use single space between $tok_type_name and $tok_type_name_next" $tok
if {$config_fix} {
regsub "(<28>$tok_type\\d+<2B>)\\s*(<28>$tok_type_next\\d+<2B>)" $tok_text($env_token) "\\1 \\2" tok_text($env_token) }
}
} else {
abort "$error_message at line $tok_line($env_token)" }
}
# between namespace name and opening brace
foreach tok [select_by_type namespace] {
set namespace_name [expand_token [lindex [select_by_type identifier $tok] 0]]
check_single_spaces $tok identifier "namespace name \"$namespace_name\"" \
namespaceblock "opening brace" \
"Invalid namespace definition"
}
# no space after function names
foreach tok_type {function funcsignature funcptr destfunction} {
foreach tok [select_by_type $tok_type] {
set pattern {\s+<2B>((parenblk|argparenblk)\d+)<29>}
if {[regexp $pattern $tok_text($tok) dummy paren_token dummy] == 0} continue
warn "superfluous space between function name and parenthesis" $tok
if {$config_fix} {
regsub $pattern $tok_text($tok) "<22>\\1<>" tok_text($tok)
set leaf [next_leaf $paren_token]
regsub {^\s*} $tok_text($leaf) "" tok_text($leaf)
}
#
# We change the indentation of an open parenthesis,
# so we must reindent the whole block.
#
reindent $paren_token
}
}
# superfluous space within a parenthesis block
foreach par_type {parenblk argparenblk} {
foreach par_token [select_by_type $par_type] {
# tolerate extra whitspaces in for and function statements
if {[is_type [get_parent $par_token] {forcond function}]} continue
set string [expand_token $par_token]
if {[regexp {\(\s+} $string dummy]} {
warn "leading whitespace in parenthesis block" $par_token
if {$config_fix} { strip_whitespace_from_first_argument $par_token }
}
if {[regexp {\s+\)} $string dummy]} {
warn "trailing whitespace in parenthesis block" $par_token
if {$config_fix} { strip_whitespace_from_last_argument $par_token }
}
}
}
#
# Check proper use of empty lines
#
##
# Check if there is one empty line before the specified token
#
# \param min if set to 0, accept less than one line
# \parem max if set to 0, accept more than one line
##
proc check_empty_line_before {token tok_type_name {min 1} {max 1}} {
global tok_text
global config_fix
# walk up the parent tree until we see our left context
set parent_token [get_parent $token]
while {[regexp "^<5E>$token<65>" $tok_text($parent_token)]} {
set token $parent_token
set parent_token [get_parent $token]
if {$parent_token == ""} return
}
set parent_text $tok_text($parent_token)
if {$max && [regexp "\n\n\n<>$token<65>" $parent_text dummy]} {
warn "expected only one empty line before $tok_type_name" $token
regsub "\n+<2B>$token<65>" $parent_text "\n\n<>$token<65>" parent_text
}
if {$min && [regexp "\[^\n\]\n<>$token<65>" $parent_text dummy]} {
warn "missing one empty line before $tok_type_name" $token
regsub "\n+<2B>$token<65>" $parent_text "\n\n<>$token<65>" parent_text
}
if {$config_fix} { set tok_text($parent_token) $parent_text }
}
proc check_empty_line_after {env_token tok_type tok_type_name} {
global tok_text config_fix
set txt $tok_text($env_token)
while {[regexp "<22>($tok_type\\d+)<29>((\[^\n\])|(\n\[^\n\])|(\n{3}))" $txt dummy tok next]} {
warn "Expect one empty line after $tok_type_name" $tok
regsub "(<28>$tok_type\\d+<2B>)(\n)*" $txt "\\1\n\n" txt
}
if {$config_fix} {
set tok_text($env_token) $txt }
}
# empty line after protection-scope labels
foreach p {public private protected} {
foreach tok [select_by_type $p] {
check_empty_line_after $tok label "\"$p:\" label" } }
# empty line before protection-scope labels
foreach p {public private protected} {
foreach tok [select_by_type $p] {
check_empty_line_before $tok "\"$p:\" label" 0 1 } }
# empty line at beginning of name space
foreach tok [select_by_type namespaceblock] {
#
# Allow for the special case of having a bunch of
# includes within a namespace, otherwise check for
# an empty line.
#
if {![regexp {^<5E>openbrace\d+<2B>\s<>preproc\d+<2B>} $tok_text($tok)]} {
if {[count_line_breaks $tok] > "1"} {
check_empty_line_after $tok openbrace "opening brace of name space" } }
}
##
# Return 1 if token is the first token of a block
##
proc is_first_token_of_block { token } {
global tok_text
foreach block_type {block parenblk classblock enumblock} {
#
# Walk up compound tokens until we find a block or
# find another token in front of us.
#
while {1} {
set parent [get_parent $token]
if {[is_type $parent $block_type]} {
#
# A block starts with a brace or parenthesis. We are interested
# in the second token.
#
if {[lindex [list_of_tokens $tok_text($parent)] 1] == $token} {
return 1 }
break
}
if {[lindex [list_of_tokens $tok_text($parent)] 0] != $token} {
break }
set token $parent
}
}
return 0
}
# empty line between member-functions
foreach tok_type { { funcdecl "function declaration" 0 }
{ funcimpl "function" 0 }
{ constdecl "constructor declaration" 0 }
{ constimpl "constructor" 0 }
{ destdecl "destructor declaration" 0 }
{ destimpl "destructor" 0 }
{ lcomment "single-line comment" 1 } } {
foreach tok [select_by_type [lindex $tok_type 0]] {
if {[is_type [get_parent $tok] namespaceblock]} continue
if {[is_type [get_parent $tok] content]} continue
if {[is_at_begin_of_line $tok] == 0} continue
if {[is_first_token_of_block $tok]} continue
set message [lindex $tok_type 1]
set test_min [lindex $tok_type 2]
check_empty_line_before $tok $message $test_min
}
}
#
# Check absence of spacing around certain binary operators
#
foreach identifier [select_by_type identifier] {
foreach operator {deref dot doublecolon} {
set string $tok_text($identifier)
# only look at binary operators
if {[regexp "\[^\\s\].*<2A>$operator\\d+<2B>.*\[^\\s\]" $string dummy] == 0} continue
# check for space left of operator
if {[regexp "\[\\t \]+<2B>$operator\\d+<2B>" $string dummy]} {
warn "superfluous whitespace in front of '->' operator" $identifier
regsub "\[\\t \]+(<28>$operator\\d+<2B>)" $string {\1} string
}
# check for space right of operator
while {[regexp -- "<22>$operator\\d+<2B>\[\\t \]+" $string dummy]} {
warn "superfluous whitespace after '->' operator" $tok
regsub -- "(<28>$operator\\d+<2B>)\[\\t \]+" $string {\1} string
}
if {$config_fix} { set tok_text($identifier) $string }
}
}
#
# Check for trailing spaces
#
msg "Checking for trailing spaces..."
foreach tok [select_all] {
set lines [split $tok_text($tok) "\n"]
if {[llength $lines] < 2} continue
set result {}
set cnt 0
set expanded ""
foreach line $lines {
append expanded [expand $line]
if {$cnt < [expr [llength $lines] - 1]} {
if {[regexp {\s+$} $line dummy]} {
set num_newlines [regexp -all {\n} $expanded dummy]
set line_num [expr $tok_line($tok) + $num_newlines + $cnt]
puts stderr "Warning at line $line_num: trailing space"
if {$config_fix} {
regsub {\s+$} $line "" line }
}
}
lappend result $line
incr cnt
}
if {$config_fix} { set tok_text($tok) [join $result "\n"]}
}
#
# Check for superfluous leading and spaces in single-line comments
#
foreach tok [select_by_type lcomment] {
if {[regexp {/\* +} $tok_text($tok)] == 0} continue
warn "Warning: superfluous leading space in comment" $tok
if {$config_fix} {
regsub {/\* +} $tok_text($tok) {/* } tok_text($tok) }
}
msg "Checking placement of braces and parenthesis..."
#
# Check for brace placement after 'if', 'for', 'switch', 'while', etc.
#
foreach block [select_by_type block] {
set openbrace [select_by_type openbrace $block 1]
if {![is_at_begin_of_line $openbrace]} continue
#
# Each block is wrapped in a statement. We have to inspect the
# parent of the statement.
#
set types_to_check {if for ifelse switch while}
if {![is_type [get_parent [get_parent $block]] $types_to_check]} {
continue }
warn "put opening brace to previous line" $block
remove_newline_in_front_of [next_leaf $block]
}
#
# Check for absence of newline between enum and opening brace
#
foreach enumblock [select_by_type enumblock] {
if {![is_at_begin_of_line $enumblock]} continue
warn "put opening brace to previous line" $enumblock
remove_newline_in_front_of [next_leaf $enumblock]
}
#
# Check for single space after keywords 'if', 'for', 'switch', 'while'
#
foreach type {if for while switch catch} {
set tok_type "cond"
set tok_type "$type$tok_type"
foreach tok [select_by_type $tok_type] {
if {[regexp "<22>key$type\\d+<2B> <20>(parenblk\\d+)<29>" $tok_text($tok) dummy]} continue
warn "use one space after after '$type'" $tok
if {$config_fix} {
regexp {<7B>(parenblk\d+)<29>} $tok_text($tok) dummy parenblk
regsub "\\s*(<28>parenblk\\d+<2B>)" $tok_text($tok) { \1} tok_text($tok)
set leaf [next_leaf $parenblk]
regsub {^\s*} $tok_text($leaf) "" tok_text($leaf)
reindent $tok
}
}
}
#
# Check for newline after opening and in front of closing braces
#
foreach impl_type {funcimpl constimpl destimpl} {
foreach impl [select_by_type $impl_type] {
if {[regexp {<7B>funcsignature\d+<2B>\n} $tok_text($impl)]} continue
if {[regexp {<7B>funcsignature\d+<2B>[\t ]*<2A>initializer\d+<2B>} $tok_text($impl)]} continue
# tolerate single-line or double-line implementations
set text $tok_text($impl)
regsub {^([\t ]*)<29>m?lcomment\d+<2B>} $text {\1} text
set expanded [expand $text]
if {[regexp -all {\n} $expanded dummy] < 2} continue
if {[calc_merged_line_length $impl] < 79} continue
warn "missing newline after function signature" $impl
if {$config_fix == 0} continue
set indent [gen_indent [indent_level_of [next_leaf $impl]] 0]
regsub {(<28>funcsignature\d+<2B>)\s*} $tok_text($impl) "\\1\n$indent" tok_text($impl)
}
}
#
# Check if the closing brace of an single open brace is single too
#
foreach block_type {block classblock enumbloc} {
foreach block [select_by_type $block_type] {
if {[is_at_begin_of_line $block] == 0} continue
# if close brace follows a newline, we are fine
if {[regexp {\n[\t ]*<2A>closebrace\d+<2B>} $tok_text($block) dummy]} continue
# if block is empty and only one space is used, we are fine
if {[regexp {<7B>openbrace\d+<2B> <20>closebrace\d+<2B>} $tok_text($block) dummy]} continue
if {[regexp {<7B>openbrace\d+<2B>(\s\s|)<29>closebrace\d+<2B>} $tok_text($block) dummy]} {
warn "use one space within empty block" $block
regsub -all {(<28>openbrace\d+<2B>)(\s\s|)(<28>closebrace\d+<2B>)} $tok_text($block) {\1 \2} tok_text($block)
}
warn "put closing brace of block at a new line" $block
if {$config_fix == 0} continue
regsub {\s*(<28>closebrace\d+<2B>)} $tok_text($block) "\n\\1" tok_text($block)
set closebrace [select_by_type closebrace $block 1]
regsub {^} $tok_text($closebrace) "[indent_of $block]" tok_text($closebrace)
}
}
#
# No newlines after free-standing opening braces
#
foreach block_type {block classblock enumblock} {
foreach block [select_by_type $block_type] {
if {[is_at_begin_of_line $block] == 0} continue
if {[regexp {<7B>(openbrace\d+)<29>[\t ]*\n[\ ]*\n} $tok_text($block) dummy openbrace] == 0} {
continue }
warn "superfluous empty line after opening brace" $block
if {$config_fix == 0} continue
regsub {<7B>(openbrace\d+)<29>\s*} $tok_text($block) "<22>\\1<>\n" tok_text($block)
}
}
#
# Check for free-standing closing parenthesis
#
foreach closeparen [select_by_type closeparen] {
if {![is_at_begin_of_line $closeparen]} continue
warn "put closing parenthesis at the end of previous line" $closeparen
remove_newline_in_front_of [next_leaf $closeparen] ""
}
#
# Check for absence of newline at beginning of inherit block
#
foreach inherit [select_by_type inherit] {
if {[regexp {<7B>colon\d+<2B>\s*\n\s*<2A>([^<5E>]+)<29>} $tok_text($inherit) dummy tag] == 0} {
continue }
if {![is_at_begin_of_line $tag]} continue
warn "remove linebreak after colon" $tag
remove_newline_in_front_of [next_leaf $tag] " "
}
#
# Check for superfluous lines
#
foreach tok_type {parenblk argparenblk initializer funcsignature inherit funcdecl funcimpl enum} {
foreach token [select_by_type $tok_type] {
# remove leading comment
set string $tok_text($token)
regsub {^\s*<2A>m?lcomment\d+<2B>\n} $string "" string
# if token contains no newline, there is nothing to optimize
set expanded [expand $string]
set num_newlines [regexp -all {\n} $expanded dummy]
if {$num_newlines == 0} continue
# if token contains an empty line, keep it that way
if {[regexp {\n\n} $expanded dummy]} continue
# check it token fits on a single line
if {[calc_merged_line_length $token] > 79} continue
warn "merge line with following lines" $token
if {$config_fix == 0} continue
while {[regexp {(<28>m?lcomment\d+<2B>\n.*)\n} $tok_text($token)]} {
regsub {(<28>m?lcomment\d+<2B>\n.*)\n} $tok_text($token) {\1} tok_text($token)
}
foreach tok "$token [select_by_type {.*} $token]" {
# if {[is_type $tok mlcomment]} continue
while {[regexp {\n\s*<2A>([^<5E>]+)<29>} $tok_text($tok) dummy sub_tok]} {
# indentation is always located at the leaf
set sub_tok [next_leaf $sub_tok]
regsub {\n\s*(<28>[^<5E>]+<2B>)} $tok_text($tok) " \\1" tok_text($tok)
regsub {^\s*} $tok_text($sub_tok) "" tok_text($sub_tok)
#
# If newline was located at the begin of a parenthesis block, remove the
# space after the open parenthesis
#
regsub {(<28>openparen\d+<2B>)\s*(<28>[^<5E>]+<2B>)} $tok_text($tok) "\\1\\2" tok_text($tok)
}
}
}
}
#
# Comment functions at their declaration, not their implementation
#
msg "Checking location of comments..."
proc check_impl_comment_policy { impl } {
global tok_text
if {[is_within $impl classblock]} return
if {[is_within $impl tplfunc]} return
if {[llength [select_by_type mlcomment $impl]] == 0} return
# check if function has no inline or static modifier
set modifier_tokens [select_by_type modifier $impl]
set is_local 0
foreach modifier_token $modifier_tokens {
set modifier [expand_token $modifier_token]
if {[regexp {\mstatic\M} $modifier dummy]} { set is_local 1 }
if {[regexp {\minline\M} $modifier dummy]} { set is_local 1 }
}
if {[regexp {^\s*<2A>mlcomment\d+<2B>} $tok_text($impl) dummy] == 0} return
set funcname [expand_token [select_by_type identifier [select_by_type funcsignature $impl 1] 1]]
if {!$is_local} {
warn "move description of non-local \"$funcname\" to its declaration" $impl
}
}
proc check_impl_comment_style { impl } {
global tok_text
global config_fix
if {[regexp {^\s*<2A>lcomment} $tok_text($impl) dummy]} {
warn "turn function-header comment into multi-line comment" impl
return
}
# if no function header exists, there is nothing to check
if {[regexp {^\s*<2A>(mlcomment\d+)<29>} $tok_text($impl) dummy mlcomment] == 0} {
return }
if {[regexp {^\s*\/\*\*\n} $tok_text($mlcomment)] == 0} {
warn "function-header comment should start with '/**'" $mlcomment
if {$config_fix} {
regsub {^(\s*)\/\*+\n} $tok_text($mlcomment) "\\1/**\n" tok_text($mlcomment)
}
}
if {[regexp {\n[\t ]*\*\/$} $tok_text($mlcomment)] == 0} {
warn "function-header comment should end with '*/'" $mlcomment
if {$config_fix} {
regsub {(\n[\t ]*)[* ]*\*\/ *$} $tok_text($mlcomment) "\\1\*\/" tok_text($mlcomment)
}
}
if {[regexp {^[^\w]*[a-z]} $tok_text($mlcomment)]} {
warn "brief description should start with upper case" $mlcomment
return
}
}
foreach tok_type {funcimpl constimpl destimpl} {
foreach impl_token [select_by_type $tok_type] {
check_impl_comment_policy $impl_token
check_impl_comment_style $impl_token
}
}
#
# Check function header comments
#
msg "Checking comments..."
proc check_upper_case { string message {token ""} } {
if {[string toupper $string 0 0] == $string} return
warn $message $token
}
proc check_mlcomment { token } {
global tok_text
global tok_line
global config_fix
set string $tok_text($token)
set brief_pattern {\\brief\s+([^\n]+)\n}
if {[regexp $brief_pattern $string dummy brief]} {
check_upper_case $brief "start brief decription with upper case" $token
set brief [string toupper $brief 0 0]
warn "ommit '\\brief' tag for brief description comment, use" $token
warn "/**" $token
warn " * $brief" $token
warn " *" $token
regsub $brief_pattern $string "$brief\n" string
regsub {\/\*\n} $string "/**\n" string
}
set pattern {(/\*[^\\]*\n)([\t ]*\*)\s*\\}
if {[regexp $pattern $string dummy start indent]} {
if {[regexp {[^\*]\n$} $start dummy]} {
warn "brief description of comment should be followed by an empty line" $token
regsub $pattern $string "$start$indent\n$indent \\" string
}
}
if {$config_fix} { set tok_text($token) $string }
}
proc is_at_begin_of_parent { token } {
global tok_text
return [regexp "^\\s*<2A>$token<65>" $tok_text([get_parent $token]) dummy]
}
foreach mlcomment [select_by_type mlcomment] {
# do not look at file header
if {[get_parent $mlcomment] == "content0" && [is_at_begin_of_parent $mlcomment]} continue
check_mlcomment $mlcomment
}
abort