#! /usr/bin/env python3 """Extract configuration items into various configuration headers. This uses the configitems file, a database consisting of text lines with the following single-tab-separated fields: - Name of the configuration item, e.g. PQXX_HAVE_PTRDIFF_T. - Publication marker: public or internal. - A single environmental factor determining the item, e.g. libpq or compiler. """ from __future__ import ( absolute_import, print_function, unicode_literals, ) from argparse import ArgumentParser import codecs from errno import ENOENT import os.path from os import getcwd import re from sys import ( getdefaultencoding, getfilesystemencoding, stdout, ) __metaclass__ = type def guess_fs_encoding(): """Try to establish the filesystem encoding. It's a sad thing: some guesswork is involved. The encoding often seems to be conservatively, and incorrectly, set to ascii. """ candidates = [ getfilesystemencoding(), getdefaultencoding(), 'utf-8', ] for encoding in candidates: lower = encoding.lower() if lower != 'ascii' and lower != 'ansi_x3.4-1968': return encoding raise AssertionError("unreachable code reached.") def guess_output_encoding(): """Return the encoding of standard output.""" # Apparently builds in Docker containers may have None as an encoding. # Fall back to ASCII. If this ever happens in a non-ASCII path, well, # there may be a more difficult decision to be made. We'll burn that # bridge when we get to it, as they almost say. return stdout.encoding or 'ascii' def decode_path(path): """Decode a path element from bytes to unicode string.""" return path.decode(guess_fs_encoding()) def encode_path(path): """Encode a path element from unicode string to bytes.""" # Nasty detail: unicode strings are stored as UTF-16. Which can contain # surrogate pairs. And those break in encoding, unless you use this # special error handler. return path.encode(guess_fs_encoding(), 'surrogateescape') def read_text_file(path, encoding='utf-8'): """Read text file, return as string, or `None` if file is not there.""" assert isinstance(path, type('')) try: with codecs.open(encode_path(path), encoding=encoding) as stream: return stream.read() except IOError as error: if error.errno == ENOENT: return None else: raise def read_lines(path, encoding='utf-8'): """Read text file, return as list of lines.""" assert isinstance(path, type('')) with codecs.open(encode_path(path), encoding=encoding) as stream: return list(stream) def read_configitems(filename): """Read the configuration-items database. :param filename: Path to the configitems file. :return: Sequence of text lines from configitems file. """ return [line.split() for line in read_lines(filename)] def map_configitems(items): """Map each config item to publication/factor. :param items: Sequence of config items: (name, publication, factor). :return: Dict mapping each item name to a tuple (publication, factor). """ return { item: (publication, factor) for item, publication, factor in items } def read_header(source_tree, filename): """Read the original config.h generated by autoconf. :param source_tree: Path to libpqxx source tree. :param filename: Path to the config.h file. :return: Sequence of text lines from config.h. """ assert isinstance(source_tree, type('')) assert isinstance(filename, type('')) return read_lines(os.path.join(source_tree, filename)) def extract_macro_name(config_line): """Extract a cpp macro name from a configuration line. :param config_line: Text line from config.h which may define a macro. :return: Name of macro defined in `config_line` if it is a `#define` statement, or None. """ config_line = config_line.strip() match = re.match('\s*#\s*define\s+([^\s]+)', config_line) if match is None: return None else: return match.group(1) def extract_section(header_lines, items, publication, factor): """Extract config items for given publication/factor from header lines. :param header_lines: Sequence of header lines from config.h. :param items: Dict mapping macro names to (publication, factor). :param publication: Extract only macros for this publication tag. :param factor: Extract only macros for this environmental factor. :return: Sequence of `#define` lines from `header_lines` insofar they fall within the requested section. """ return sorted( line.strip() for line in header_lines if items.get(extract_macro_name(line)) == (publication, factor) ) def compose_header(lines, publication, factor): """Generate header text containing given lines.""" intro = ( "/* Automatically generated from config.h: %s/%s config. */" % (publication, factor) ) return '\n'.join([intro, ''] + lines + ['']) def generate_config(source_tree, header_lines, items, publication, factor): """Generate config file for a given section, if appropriate. Writes nothing if the configuration file ends up identical to one that's already there. :param source_tree: Location of the libpqxx source tree. :param header_lines: Sequence of header lines from config.h. :param items: Dict mapping macro names to (publication, factor). :param publication: Extract only macros for this publication tag. :param factor: Extract only macros for this environmental factor. """ assert isinstance(source_tree, type('')) config_file = os.path.join( source_tree, 'include', 'pqxx', 'config-%s-%s.h' % (publication, factor)) unicode_path = config_file.encode(guess_output_encoding(), 'replace') section = extract_section(header_lines, items, publication, factor) contents = compose_header(section, publication, factor) if read_text_file(config_file) == contents: print("Generating %s: no changes--skipping." % unicode_path) return print("Generating %s: %d item(s)." % (unicode_path, len(section))) path = encode_path(config_file) with codecs.open(path, 'wb', encoding='ascii') as header: header.write(contents) def parse_args(): """Parse command-line arguments.""" default_source_tree = os.path.dirname( os.path.dirname(os.path.normpath(os.path.abspath(__file__)))) parser = ArgumentParser(description=__doc__) parser.add_argument( 'sourcetree', metavar='PATH', default=default_source_tree, help="Location of libpqxx source tree. Defaults to '%(default)s'.") return parser.parse_args() def check_args(args): """Validate command-line arguments.""" if not os.path.isdir(args.sourcetree): raise Exception("Not a directory: '%s'." % args.sourcetree) def get_current_dir(): cwd = getcwd() if isinstance(cwd, bytes): return decode_path(cwd) else: return cwd def main(): """Main program entry point.""" args = parse_args() check_args(args) # The configitems file is under revision control; it's in sourcetree. items = read_configitems(os.path.join(args.sourcetree, 'configitems')) publications = sorted(set(item[1] for item in items)) factors = sorted(set(item[2] for item in items)) # The config.h header is generated; it's in the build tree, which should # be where we are. directory = get_current_dir() original_header = read_header( directory, os.path.join('include', 'pqxx', 'config.h')) items_map = map_configitems(items) for publication in publications: for factor in factors: generate_config( directory, original_header, items_map, publication, factor) if __name__ == '__main__': main()