#!/usr/bin/perl # $Id: convert_swig 3762 2014-12-04 19:48:35Z alin $ use FindBin qw($Bin); use strict ; use Getopt::Long; use Pod::Usage; use Pod::Text; use Text::Balanced qw ( extract_bracketed ); use lib "$Bin/pm" ; use File::Basename ; use Cwd 'abs_path' ; use gte ; use get_headers ; use trick_version ; use Digest::MD5 qw(md5_hex) ; ## ## ================================================================================ ## Program: convert_swig ## ## The purpose of convert_swig is to create SWIG interface files for the given ## C/C++ header file (usually S_source.hh) and each of the header files that it ## (recursively) includes. SWIG (Simplified Wrapper and Interface Generator) is ## an interface compiler that connects programs written in C and C++ with scripting ## languagues such as Perl and Python. ## my $usage=" Name: convert_swig - Convert a C/C++ header file into a SWIG interface file. SWIG (Simplified Wrapper and Interface Generator) is an interface compiler that connects programs written in C and C++ with scripting languagues such as Perl and Python. Options: -h, --help display this help and exit -s, --stl Allow convert_swig to process STLs -o, --outfile provide a name for the generated SWIG interface file which is usually denoted with a special .i or .swg suffix Usage: convert_swig [-o ] Examples: % convert_swig -o swig/Ball.i include/Ball.hh % convert_swig -o swig_src/S_source.i S_source.hh " ; my ($in_file , $out_file ) ; my $help = ''; # option variable with default value (false) my $stls = 0; # option variable with default value (false) my ( @include_paths, @include_dirs , @defines) ; my ( @swig_exclude_dirs) ; my %sim ; my %out_of_date ; my ($version, $thread, $year) ; my %icg_no ; my %skipped_files ; my %processed_templates ; my $typedef_def = qr/typedef\s+ # the word typedef (?:[_A-Za-z][\s\w]*\s*) # resolved type (?:[_A-Za-z]\w*) # new type \s*; # semicolon /sx ; my $typedef_enum_def = qr/typedef\s+enum\s* # the words typedef enum (?:\s+[_A-Za-z]\w*)?\s* # optional name {(?:\s*\d+\s*)* # opening brace and possible comments (?:.*?}\s* # everything to closing brace [\w,\s\*]*\s*;) # enum name and ; /sx ; my $typedef_struct = qr/typedef\s+(?:struct|union)\s* # the words typedef struct|union (?:\s+[_A-Za-z]\w*)?\s* # optional name { # opening brace /sx ; my $namespace_def = qr/namespace\s* # keyword namespace (?:\s+[_A-Za-z]\w*) # class name /sx ; my $class_def = qr/(?:class|struct)\s* # keyword class or struct (?:\s+[_A-Za-z]\w*)\s* # class name (?:\{|:(?!\:)) # { or punctuator : /sx ; my $template_def = qr/template\s* # keyword template <[^>]+>\s* # template parameters class\s*[_A-Za-z]\w*\s* # keyword class and class name /sx ; my $template_var_def = qr/(?:\:\:)?[_A-Za-z][:\w]*\s* # template name <[\w\s\*,:<>]*>\s* # template parameters [_A-Za-z]\w*\s*; # var name ; /sx ; # This list is the list of all STL types to ignore. my %all_stl_names = qw(vector 1 list 1 deque 1 set 1 multiset 1 map 1 multimap 1 hash_set 1 hash_multiset 1 hash_map 1 hash_multimap 1 stack 1 queue 1 priority_queue 1 bitset 1 auto_ptr 1 std::vector 1 std::list 1 std::deque 1 std::set 1 std::multiset 1 std::map 1 std::multimap 1 std::hash_set 1 std::hash_multiset 1 std::hash_map 1 std::hash_multimap 1 std::stack 1 std::queue 1 std::priority_queue 1 std::bitset 1 std::auto_ptr 1 pair 1 std::pair 1 std::tr1::shared_ptr 1) ; # This is a partial list of STL types to ignore. We do not ignore vector, map, list if we allow STLs my %stl_names = qw(deque 1 set 1 multiset 1 multimap 1 hash_set 1 hash_multiset 1 hash_map 1 hash_multimap 1 stack 1 queue 1 priority_queue 1 bitset 1 auto_ptr 1 std::deque 1 std::set 1 std::multiset 1 std::multimap 1 std::hash_set 1 std::hash_multiset 1 std::hash_map 1 std::hash_multimap 1 std::stack 1 std::queue 1 std::priority_queue 1 std::bitset 1 std::auto_ptr 1 pair 1 std::pair 1 std::tr1::shared_ptr 1) ; Getopt::Long::Configure ("bundling"); GetOptions ( "outfile|o=s" => \$out_file , "stl|s" => sub { $stls = 1 } , 'help|h' => \$help ) or usage() ; if ( $help ) { usage() ; } ($version, $thread) = get_trick_version() ; ($year) = $version =~ /^(\d+)/ ; @include_paths = $ENV{"TRICK_CFLAGS"} =~ /(-I\s*\S+)/g ; # get include paths from TRICK_CFLAGS @include_dirs = $ENV{"TRICK_CFLAGS"} =~ /-I\s*(\S+)/g ; # get include paths from TRICK_CFLAGS @swig_exclude_dirs = split /:/ , $ENV{"TRICK_SWIG_EXCLUDE"} ; if ( scalar @swig_exclude_dirs == 0 ) { @swig_exclude_dirs = split /:/ , $ENV{"TRICK_ICG_EXCLUDE"} ; } push @include_paths , ("-I".$ENV{"TRICK_HOME"}."/trick_source" , "-I../include") ; @defines = $ENV{"TRICK_CFLAGS"} =~ /(-D\S+)/g ; # get defines from TRICK_CFLAGS if ( $ENV{"TRICK_CFLAGS"} !~ /DTRICK_VER=/ ) { push @defines , "-DTRICK_VER=$year" ; } #(my $cc = gte("TRICK_CC")) =~ s/\n// ; # Use the c preporcessor directly my $cc ; my $system_type = `uname -s` ; chomp $system_type ; if ( $system_type eq "Darwin" ) { $cc = "/usr/bin/llvm-gcc -E" ; } else { $cc = "/usr/bin/cpp" ; } ($in_file) = (grep !/^-/ , @ARGV) ; ## The list of header files to be processed is usually produced by the script ## make_swig_makefile.pm, as it's creating Makefile_swig. This list is stored in ## the file ".S_library_swig". So, if .S_library_swig exists, we can just open and read it. ## my ($s_library_swig) = ".S_library_swig" ; if ( -e $s_library_swig ) { open FILE_LIST, $s_library_swig ; while ( ) { chomp ; $sim{final_all_includes}{$_} = 1 ; } } else { ## Otherwise we need to process S_source.hh to produce the list of header files. ## ## Specifically, we want to generate SWIG interfaces for those header files that are: ## 1) actual dependencies of S_source.hh, GIVEN THE CURRENT environment and ## 2) not excluded from ICG processing ( by ICG_NO or ICG_EXCLUDE). ## ## The header files that are actually included are the dependencies we care ## about. Keep in mind that the pre-processor and the current ENVIRONMENT ## may cause some headers to be conditionally included or excluded. We only ## want to generate SWIG interfaces for headers that are ACTUALLY included. ## ## Whereas the pre-processor can (using the gcc -MM option) generate a list ## of dependencies that satisfy 1) (above), it can't handle that ICG exclusions. ## And, whereas the function get_headers() can generate a list of dependences ## which are flagged if they contain ICG_NO, it doesn't handle conditional includes. ## ## So, the strategy that we employ is to generate and then find the ## intersection of both lists. Then we eliminate those that are in 1) ## $TRICK_HOME/trick_source, or 2) flagged as containing ICG_NO or 3) are ## in ICG_EXCLUDE'd directories. ## ## First, create a list headers using the GCC with the -MM option. GCC will ## handle conditional inclusion. ## # list of files to process does not exist. Process S_source.hh to create one. my ($s_source_full_path) = abs_path("S_source.hh") ; open FILE_LIST, "$cc -MM -DSWIG @include_paths @defines $in_file |" ; my $dir ; $dir = dirname($s_source_full_path) ; while ( ) { next if ( /^#/ or /^\s+\\/ ) ; my $word ; foreach $word ( split ) { next if ( $word eq "\\" or $word =~ /o:/ ) ; if ( $word !~ /^\// and $dir ne "\/" ) { $word = "$dir/$word" ; } $word = abs_path(dirname($word)) . "/" . basename($word) ; # filter out system headers that are missed by the compiler -MM flag next if ( $word =~ /^\/usr\/include/) ; next if ( $word =~ /$ENV{TRICK_HOME}\/trick_source\//) ; #print "gcc found $word\n" ; $sim{gcc_all_includes}{$word} = 1 ; #$sim{mod_date}{$word} = (stat $word)[9] ; } } ## Second, create a list where the files are flagged if they contain ICG_NO. ## get_headers(\%sim, $s_source_full_path) ; if ( open ICGNOFOUND , (".icg_no_found") ) { while ( ) { chomp ; $icg_no{$_} = 1 ; } } ## Then we generate the intersection of the two lists and then eliminate the dependencies that: ## 1) are in $TRICK_HOME/trick_source. ## 2) contain ICG_NO. ## 3) are in ICG_EXCLUDE'd directories. ## to create the final list of header dependencies that we need to convert into SWIG interfaces. ## foreach my $k ( keys %{$sim{gcc_all_includes}} ) { $sim{final_all_includes}{$k} = 1 if exists $sim{all_includes}{$s_source_full_path} ; } foreach my $f ( keys %{$sim{final_all_includes}} ) { if ( $f =~ /$ENV{TRICK_HOME}\/trick_source/ ) { delete $sim{final_all_includes}{$f} ; } elsif ( exists $icg_no{$f} ) { $skipped_files{$f} = "ICG No found" ; delete $sim{final_all_includes}{$f} ; } else { foreach my $ie ( @swig_exclude_dirs ) { # if file location begins with $ie (an IGC exclude dir) if ( $f =~ /^\Q$ie/ ) { $skipped_files{$f} = "ICG exclude dir $ie" ; delete $sim{final_all_includes}{$f} ; last ; # break out of loop } } } } } ## Next we need to determine which of the files do not have up-to-date SWIG files. ## For each header file in final dependency list, if the corresponding SWIG (.i) file ## doesn't exist or the header file is newer than the existing SWIG file, then record ## that a new SWIG file needs needs to be created. The global hash %out_of_date ## represents a list of header files whose corresponding .i files need to be regenerated. ## foreach my $f ( keys %{$sim{final_all_includes}} ) { my ($swig_dir , $base_file , $swig_file ) ; $base_file = basename($f) ; $base_file =~ s/\.[^\.]+$// ; $swig_dir = dirname($f) ; $swig_dir =~ s/\/include$//g ; $swig_dir .= "/swig" ; $swig_file = "$swig_dir/${base_file}.i" ; #print "$swig_file\n" ; if ( -e $swig_file ) { if ( !exists $sim{mod_date}{$f} ) { $sim{mod_date}{$f} = (stat $f)[9] ; } $sim{swig_mod_date}{$f} = (stat $swig_file)[9] ; if ( $sim{mod_date}{$f} > $sim{swig_mod_date}{$f} ) { $out_of_date{$f} = 1 ; } } else { $out_of_date{$f} = 1 ; $sim{swig_mod_date}{$f} = 0 ; } } if ( scalar keys %out_of_date == 0 ) { exit ; } foreach ( sort keys %skipped_files ) { print "convert_swig skipping $_ ($skipped_files{$_})\n" ; } ## Finally, call process_file() to create SWIG interface files for each of the out_of_date headers. ## process_file(\%sim , abs_path($in_file)) ; ## ## ================================================================================ ## process_file ## ## Synopsis ## ## This subroutine processes S_source.h and each of it's requisite header files to ## generate the corresponding SWIG interfaces files. ## ## Parameters ## ## sim_ref ## ## Is this parameter ever used? ## ## in_file ## ## The name of input file, invariably "S_source.hh". ## sub process_file($$) { my ($sim_ref , $in_file) = @_ ; ## Foreach out_of_date header file, generate a SWIG interface file. foreach my $f ( keys %out_of_date ) { my @class_names ; my ($raw_contents , $contents , $new_contents ) ; my ($curr_dir) ; next if ( $f =~ /^$ENV{TRICK_HOME}\/trick_source/ ) ; # clear the processed templates per each file undef %processed_templates ; ## Read in the entire file contents into raw_contents. open IN_FILE, $f ; local $/ ; $raw_contents = ; # remove all comments, they can cause all kinds of trouble # leave the line continuation character if present in a c++ style comment. $raw_contents =~ s/\/\*(?:.*?)\*\/|\/\/(?:.*?)(\\)?(\n)/$1\n/sg ; ## The init_attr functions cause problems when we try and wrap them with SWIG. ## We can safely remove them from the header files. ## Remove the friend init_attr functions that appear in multiline define statements $raw_contents =~ s/\\\n\s*friend\s+void\s+init_attr[^(]+\s*\(\s*\)(\s*);?/$1/sg ; ## Remove the friend init_attr functions outside of multiline define statements $raw_contents =~ s/friend\s+void\s+init_attr[^(]+\s*\(\s*\)(\s*);?/$1/sg ; #$raw_contents =~ s/friend\s+void\s+init_attr[^;]+;//sg ; $raw_contents =~ s/__const/const/sg ; ## For each of the #includes in the out_of_date header file ## create a corresponding %import directive. foreach (split /^/, $raw_contents) { if ( /^(\s*\#\s*include\s+)([^\n]+)/ ) { my ( $include ) = $1 ; my ( $file_name ) = $2 ; if ( $file_name !~ /\$out_file" ; print OUT "\%module m$md5_sum\n\n" ; print OUT "/* $in_file */\n" ; print OUT "#include \"trick_swig/trick_swig.i\"\n\n" ; print OUT " \%{ #include \"$f\" \%}\n" ; print OUT "\n" ; my %class_typemap_printed ; foreach my $c ( @class_names ) { if ( ! exists $class_typemap_printed{$c} ) { my $c_ = $c ; $c_ =~ s/\:/_/g ; print OUT "\%trick_swig_class_typemap($c, $c_)\n" ; $class_typemap_printed{$c} = 1 ; } } print OUT "\n$new_contents" ; print OUT "$contents\n" ; # Add a trick_cast_as macro line for each class parsed in the file. These lines must appear at the bottom of the # file to ensure they are not in a namespace directive and they are after the #define statements they depend on. undef %class_typemap_printed ; foreach my $c ( @class_names ) { if ( ! exists $class_typemap_printed{$c} ) { my $c_ = $c ; $c_ =~ s/\:/_/g ; print OUT "#ifdef TRICK_SWIG_DEFINED_$c_\n" ; print OUT "\%trick_cast_as($c, $c_)\n" ; print OUT "#endif\n" ; $class_typemap_printed{$c} = 1 ; } } close OUT ; print "Writing swig_file $out_file\n" ; } } sub usage() { print "$usage\n" ; exit ; } ## ================================================================================ ## process_contents ## ## Synopsis ## ## Process header file contents for use in the corresponding SWIG interface file. ## ## Parameters ## ## contents_ref ## (IN) reference to header file contents that are to be converted to a SWIG interface. ## ## new_contents_ref ## (OUT) SWIG interface code, derived from the header file contents. ## ## curr_namespace ## (IN) current namespace. ## ## class_names_ref ## (OUT) reference to an array of class and/or struct names encountered when ## processing the header file contents. ## ## Function Dependencies ## ## process_typedef_struct() ## process_namespace() ## process_class() ## sub process_contents($$$$) { my ( $contents_ref , $new_contents_ref , $curr_namespace , $class_names_ref ) = @_ ; while ( $$contents_ref =~ s/^(.*?)(?:($typedef_struct)| ($template_def)| ($namespace_def)| ($class_def))//sx ) { my ( $non_var ) = $1 ; my ( $typedef_struct_string ) = $2 ; my ( $template_string ) = $3 ; my ( $namespace_string ) = $4 ; my ( $class_string ) = $5 ; ## Handle the case of: non_var if ( $non_var ne "" ) { $$new_contents_ref .= $non_var ; } ## ## Handle the case of: typedef_struct ==> typedef (struct | union ) '{' ... ## if ( $typedef_struct_string ne "" ) { process_typedef_struct($typedef_struct_string , $contents_ref, $new_contents_ref, $class_names_ref) ; } ## ## Handle the case of: template_def ==> template '<' '>' class ... ## This is required so that templated classes do not match the plain class definition. ## if ( $template_string ne "" ) { $$new_contents_ref .= $template_string ; process_template( $contents_ref , $new_contents_ref ) ; } ## ## Handle the case of: namespace_def ==> namespace ## if ( $namespace_string ne "" ) { process_namespace( $namespace_string , $contents_ref , $new_contents_ref , $curr_namespace , $class_names_ref ) ; } ## ## Handle the case of: class_def ==> ( class | struct ) ( '{' | ':' ) ## if ( $class_string ne "" ) { process_class( $class_string , $contents_ref , $new_contents_ref , $curr_namespace , $class_names_ref ) ; } } } ## ================================================================================ ## process_template ## ## Synopsis ## ## Process template class definitions. We want to pass through the contents the template ## without processing. We use extract_bracketed to find the whole template definition ## and copy that into new_contents_ref. ## ## Parameters ## ## contents_ref ## (IN) This is a reference to the remainder of the header file (following the ## above string) to be processed. ## ## new_contents_ref ## (OUT) The SWIG code generated so far. ## sub process_template($$) { my ( $contents_ref , $new_contents_ref ) = @_ ; my $extracted ; if ( $$contents_ref =~ s/^(\s*;)//s ) { $$new_contents_ref .= $1 ; } else { # grab all of the text including the opening bracket. $$contents_ref =~ s/^(.*?\s*\{)//s ; $$new_contents_ref .= $1 ; # grab the rest of the template ($extracted, $$contents_ref) = extract_bracketed( "{" . $$contents_ref , "{}") ; # remove added extra opening "brace" $extracted =~ s/^\{// ; $extracted =~ s/const\s+static/static const/g ; $$new_contents_ref .= $extracted ; } } ## ================================================================================ ## process_namespace ## ## Synopsis ## ## Process namespaces found in a header file for use in the corresponding SWIG ## interface file. ## ## Parameters ## ## namespace ## (IN) This is a string of the form: B B>, that was extracted ## from the header file contents. In the contents there should remain the bracketed ## content to which this namespace applies. ## ## contents_ref ## (IN) This is a reference to the remainder of the header file (following the ## above string) to be processed. ## ## new_contents_ref ## (OUT) The SWIG code generated so far. ## ## curr_namespace ## (IN) current namespace. ## ## class_names_ref ## (OUT) reference to an array of class and/or struct names encountered when ## processing the header file contents. ## ## Function Dependencies ## ## extract_bracketed() ## process_contents() ## sub process_namespace($$$$$) { my ( $namespace_string , $contents_ref , $new_contents_ref , $curr_namespace , $class_names_ref ) = @_ ; my $extracted ; my ($namespace_name) ; # Get the name of this namespace and add it to the current namespace $namespace_string =~ /namespace\s+([_A-Za-z]\w*)/sx ; $namespace_name = $curr_namespace . "$1::" ; #print "*** namespace_name = $namespace_name ***\n" ; $$new_contents_ref .= $namespace_string ; # Extract the contents of the namespace ($extracted, $$contents_ref) = extract_bracketed( $$contents_ref , "{}") ; # Process the contents of the namespace process_contents( \$extracted , $new_contents_ref , $namespace_name , $class_names_ref ) ; # Append whatever wasn't matched in process contents to the new file. $$new_contents_ref .= $extracted ; } ## ================================================================================ ## process_class ## ## Synopsis ## Process classes declarations found in a header file for use in the corresponding ## SWIG interface file. ## ## Parameters ## ## class_string ## (IN) This is a string of the form: ( class | struct ) ( '{' | ':' ) ## ## contents_ref ## (IN) This is a reference to the remainder of the header file (following the ## class_string) to be processed. ## ## new_contents_ref ## (OUT) The SWIG code generated so far. ## ## curr_namespace ## (IN) current namespace. ## ## class_names_ref ## (OUT) reference to an array of class and/or struct names encountered when ## processing the header file contents. ## ## sub process_class($$$$$) { my ( $class_string , $contents_ref , $new_contents_ref , $curr_namespace , $class_names_ref ) = @_ ; my $extracted ; my ($class_name) ; my $template_typedefs ; ## Extract the class_name from the class_string $class_string =~ /^(?:class|struct)\s+ # keyword class or struct ([_A-Za-z]\w*) # class name \s*[\{\:]$ /sx or die "Internal error" ; $class_name = $1 ; $$new_contents_ref .= $class_string ; if ( $class_string !~ /\{$/ ) { $$contents_ref =~ s/^(.*?\s*\{)//s ; $$new_contents_ref .= $1 ; } ($extracted, $$contents_ref) = extract_bracketed( "{" . $$contents_ref , "{}") ; # remove the trailing semicolon because we may append text to the class. $$contents_ref =~ s/^\s*;//s ; #remove added extra opening "brace" $extracted =~ s/^\{// ; #print "*** extracted = $extracted ***\n" ; #print "*** contents = $$contents_ref ***\n" ; # SWIG doesn't like "const static". Change it to "static const" $extracted =~ s/const\s+static/static const/g ; # templated variables need to be declared with the SWIG %template directive. # This loop looks for any templated variables and creates the %template lines. while ( $extracted =~ s/^(.*?)(?:($template_var_def))//sx ) { my ( $non_var ) = $1 ; my ( $template_var_def_str ) = $2 ; if ( $non_var ne "" ) { #print "*** non_var = $non_var ***\n" ; $$new_contents_ref .= $non_var ; } if ( $template_var_def_str ne "" ) { #print "*** template_var = $template_var_def_str ***\n" ; $template_var_def_str =~ /(.*?)([_A-Za-z]\w*)\s*;/s ; my ($template_full_type) = $1 ; my ($var_name) = $2 ; #print "*** var_name = $var_name ***\n" ; $$new_contents_ref .= $template_var_def_str ; $template_full_type =~ /([_A-Za-z][:\w]*)\s*] {" OR "typedef union [] {" ## ## contents_ref ## (IN) This is a reference to the remainder of the header file (following the ## above string) to be processed. ## ## new_contents_ref ## (OUT) The SWIG code generated so far. ## ## class_names_ref ## (OUT) reference to an array of class and/or struct names encountered when ## processing the header file contents. ## sub process_typedef_struct($$$$) { my ($typedef_struct_string , $contents_ref, $new_contents_ref , $class_names_ref) = @_ ; my $extracted ; my ($begin, $tail , $struct_names, @struct_names) ; #print "*** typedef_struct_string = $typedef_struct_string ***\n" ; $$new_contents_ref .= $typedef_struct_string ; $typedef_struct_string =~ s/((?:typedef\s+)?(struct|union)\s* # the words typedef struct|union ([_A-Za-z]\w*)?\s* # optional name {)//sx ; $begin = $3 ; ($extracted, $$contents_ref) = extract_bracketed( "{" . $$contents_ref , "{}") ; #print "*** extracted = $extracted ***\n" ; #remove added extra opening "brace" $extracted =~ s/{// ; $$contents_ref =~ s/^(\s*([\w,\s\*]+)?\s*;)//sx ; $tail = $1 ; $struct_names = $2 ; $struct_names =~ s/\s//g ; @struct_names = split /,/ , $struct_names ; if ( $begin ne "" ) { push @$class_names_ref , $begin ; } foreach my $s ( @struct_names ) { if ( $s !~ /\*/ ) { push @$class_names_ref , $s ; } } $$new_contents_ref .= $extracted . $tail ; } __END__