/* PURPOSE: (Monte Carlo simulation.) */ #ifndef MONTECARLO_HH #define MONTECARLO_HH #include #include #include "MonteVar.hh" #include "sim_services/Executive/include/Executive.hh" #include "sim_services/include/RemoteShell.hh" #include "trick_utils/comm/include/tc.h" #ifndef HOST_NAME_MAX #define HOST_NAME_MAX 128 #endif namespace Trick { /** * Represents a particular iteration in a Monte Carlo simulation. In addition to some bookkeeping information, a run * contains the variable values specific to this iteration. * * @author Alex Lin * @author Donna Panter * @author Derek Bankieris * * @date August 2010 */ class MonteRun { public: /** Details the manner in which this run exited. */ enum ExitStatus { INCOMPLETE, /**< not completed */ COMPLETE, /**< completed with no errors */ CORED, /**< core dumped */ TIMEDOUT, /**< timed out */ NO_PERM, /**< could not write output files */ BAD_INPUT, /**< problem parsing monte carlo input */ UNKNOWN /**< unrecognized return code */ }; /** Unique identifier sequentially assigned, starting at zero, by the master. */ unsigned int id; /**< \n trick_units(--) */ /** Number of times this run has been dispatched. */ unsigned int num_tries; /**< \n trick_units(--) */ /** Time at which this run began. */ double start_time; /**< \n trick_units(--) */ /** Time at which this run ended. */ double end_time; /**< \n trick_units(--) */ /** Variable values specific to this Monte Carlo iteration. */ std::vector variables; /**< \n trick_units(--) */ /** Manner in which this run exited. */ ExitStatus exit_status; /**< \n trick_units(--) */ /** * Constructs a MonteRun with the specified id. * * @param id unique indentifier */ MonteRun(unsigned int in_id) : id(in_id), num_tries(0), start_time(0), end_time(0), exit_status(INCOMPLETE) {} }; /** * Represents a slave in a Monte Carlo simulation. * * @see @ref MonteCarloSlaves "Slaves" * * @author Alex Lin * @author Donna Panter * @author Derek Bankieris * * @date August 2010 */ class MonteSlave { public: /** Operational state. */ enum State { UNINITIALIZED, /**< newly created */ INITIALIZING, /**< starting up */ READY, /**< awaiting new run */ RUNNING, /**< processing a run */ STOPPING, /**< stopping after current run */ STOPPED, /**< not accepting new runs */ FINISHED, /**< completed all runs */ UNRESPONSIVE_RUNNING, /**< timed out and in a running state */ UNRESPONSIVE_STOPPING, /**< timed out and in a stopping state */ DISCONNECTED /**< lost connection */ }; /** Master-to-slave commands. */ enum Command { PROCESS_RUN, /**< process a new run */ SHUTDOWN, /**< kill any executing run, call shutdown jobs, and shutdown cleanly */ DIE /**< kill any executing run, do not call shutdown jobs, and exit */ }; /** Unique identifier assigned by the master. */ unsigned int id; /**< \n trick_units(--) */ /** Operational state. */ State state; /**< \n trick_units(--) */ /** Name of the machine on which this slave is running. */ std::string machine_name; /**< \n trick_units(--) */ /** Port over which this slave is listening for dispatches. */ unsigned int port; /**< \n trick_units(--) */ /** Run most recently dispatched to this slave. */ MonteRun *current_run; /**< \n trick_units(--) */ /** Number of runs dispatched to this slave. */ unsigned int num_dispatches; /**< \n trick_units(--) */ /** Number of results returned by this slave. */ unsigned int num_results; /**< \n trick_units(--) */ /** Total cpu time used. */ double cpu_time; /**< \n trick_units(--) */ /** Shell command with which to start this slave. */ Trick::RemoteShell remote_shell; /**< \n trick_units(--) */ /** * User defined shell command with which to start this slave when the #remote_shell is * Trick::TRICK_USER_REMOTE_SHELL. */ std::string user_remote_shell; /**< \n trick_units(--) */ /** Optional arguments to use with the remote shell command. */ std::string remote_shell_args; /**< \n trick_units(--) */ /** Multiplier speed of this slave's machine. */ double multiplier; /**< \n trick_units(--) */ /** Remote program directory pathname. */ std::string sim_path; /**< \n trick_units(--) */ /** Remote program name. */ std::string S_main_name; /**< \n trick_units(--) */ void set_S_main_name(std::string name); /**< \n trick_units(--) */ /** * Constructs a MonteSlave with the specified machine name. * * @param name the slave's machine's name */ MonteSlave(std::string name = "localhost") : id(0), state(UNINITIALIZED), port(0), current_run(NULL), num_dispatches(0), num_results(0), cpu_time(0), remote_shell(Trick::TRICK_SSH), multiplier(1) { if (name.empty()) { machine_name = "localhost"; } else { machine_name = name; } } }; /** * Specifies a range of valid run numbers. * * @see MonteRun::id * * @author Alex Lin * @author Donna Panter * @author Derek Bankieris * * @date August 2010 */ class MonteRange { protected: /** Starting run number. */ unsigned int start; /**< \n trick_units(--) */ /** Ending run number. */ unsigned int end; /**< \n trick_units(--) */ public: /** * Constructs a MonteRange with the specified inclusive end points. Specifying an end value that is less than the * start value results in a range which includes only the start value. * * @param start starting run number * @param end ending run number */ MonteRange(unsigned int in_start, unsigned int in_end) : start(in_start), end(in_end) { if (end < start) { this->end = start; } } /** Gets #start. */ unsigned int get_start() { return start; } /** Gets #end. */ unsigned int get_end() { return end; } }; /** * Represents a Monte Carlo simulation. * * @see @ref MonteCarloPage "Monte Carlo" * * @author Alex Lin * @author Donna Panter * @author Derek Bankieris * * @date August 2010 */ class MonteCarlo : public Trick::Scheduler { friend class InputProcessor; #ifndef SWIG friend void init_attrTrick__MonteCarlo(); #endif public: /** Verbosity of message reporting. */ enum Verbosity { NONE, /**< report no messages */ ERROR, /**< report error messages */ INFORMATIONAL, /**< report error and informational messages, no warning messages */ ALL /**< report all messages (error, informational & warning) */ }; private: int run_queue(Trick::ScheduledJobQueue* queue, std::string in_string) ; int open_file(std::string file_name, FILE** file_ptr) ; void write_to_run_files(std::string file_name) ; int initialize_sockets() ; int construct_run_directory() ; void shutdown_slaves() ; void print_statistics(FILE** fp) ; void dryrun() ; void initialize_slave(Trick::MonteSlave* slave_to_init) ; void default_slave_dispatch_pre_text(Trick::MonteSlave*, std::string &buffer) ; protected: /** Indicates whether or not this is a Monte Carlo simulation. */ bool enabled; /**< \n trick_units(--) */ /** * Indicates whether or not this is a dry run. A dry run executes pre run jobs only. Post run jobs and the runs * themselves are not executed. */ bool dry_run; /**< \n trick_units(--) */ /** * Indicates whether or not the localhost should be treated as a remote machine. This determines if slaves running * locally use remote shells. */ bool localhost_as_remote; /**< \n trick_units(--) */ /** * Indicates how much automation should be employed in forming the commands used to dispatch slaves. * - A value of true indicates that Trick should form only the core of the command, which consists of * S_main executable with the proper slave-specific arguments. #custom_pre_text and #custom_post_text will then be * prepended and appended to the core, respectively, before executing the command. #remote_shell_args, * #user_cmd_string, MonteSlave::remote_shell, and MonteSlave::machine_name are ignored. * - A value of false indicates that Trick should use the shell specified by MonteSlave::remote_shell * with the #remote_shell_args, followed by the slave's machine name, #user_cmd_string, navigation to the correct * directory, and finally the core command described above. #custom_pre_text and #custom_post_text are ignored. */ bool custom_slave_dispatch; /**< \n trick_units(--) */ /** Maximum time to wait for a run to complete. Defaults to 120 seconds. */ double timeout; /**< \n trick_units(s) */ /** Maximum number of times that a run may be dispatched. Defaults to two. Specify zero for no limit. */ unsigned int max_tries; /**< \n trick_units(--) */ /** Options to be passed to the remote shell when spawning new slaves. */ std::string user_cmd_string; /**< \n trick_units(--) */ /** Text to be prepended to the core slave dispatch command as described in #custom_slave_dispatch. */ std::string custom_pre_text; /**< \n trick_units(--) */ /** Text to be appended to the core slave dispatch command as described in #custom_slave_dispatch. */ std::string custom_post_text; /**< \n trick_units(--) */ /** Highest level of messages to report. */ Verbosity verbosity; /**< \n trick_units(--) */ /** Default to false and randomly find port numbers. True, use the user provided port numbers. */ bool default_port_flag; /**< \n trick_units(--) */ /** Device over which connections are accepted. */ TCDevice listen_device; /**< \n trick_units(--) */ /** Device over which data is sent and received. */ TCDevice connection_device; /**< \n trick_units(--) */ /** Device over which connections are accepted between the Slave child and Master. */ TCDevice data_listen_device; /**< \n trick_units(--) */ /** Device over which data is sent and received between Slave child and Master. */ TCDevice data_connection_device; /**< \n trick_units(--) */ /** Runs to be dispatched. */ std::deque runs; /**< \n trick_units(--) */ /** Failed runs. */ std::deque failed_runs; /**< \n trick_units(--) */ /** Valid ranges. */ std::vector run_ranges; /**< \n trick_units(--) */ /** Variables. */ std::vector variables; /**< \n trick_units(--) */ /** Slaves. */ std::vector slaves; /**< \n trick_units(--) */ /** Number of slaves. Exists for Variable Server access. */ int num_slaves; /**< \n trick_units(--) */ /** List of slave pointers. Exists for Variable Server access. */ Trick::MonteSlave **slaves_head; /**< \n trick_units(--) */ /** Current run dispatched. */ unsigned int current_run; /**< \n trick_units(--) */ /** User-specified number of runs. */ unsigned int num_runs; /**< \n trick_units(--) */ /** Number of runs in range. */ unsigned int actual_num_runs; /**< \n trick_units(--) */ /** Number of results. */ unsigned int num_results; /**< \n trick_units(--) */ /** Time simulation began. */ double start_time; /**< \n trick_units(--) */ /** Time simulation ended. */ double end_time; /**< \n trick_units(--) */ /** Port on which the master is listening. This value is unspecified for the master. */ unsigned int master_port; /**< \n trick_units(--) */ /** Port on which the master is listening for data. This value is unspecified for the master. */ unsigned int data_port; /**< \n trick_units(--) */ /** Unique identifier. This value is zero for the master. */ unsigned int slave_id; /**< \n trick_units(--) */ /** Name of the machine on which this simulation is running. */ std::string machine_name; /**< \n trick_units(--) */ /** Run data file. */ FILE *run_data_file; /**< \n trick_io(**) */ /** Run header file. */ FILE *run_header_file; /**< \n trick_io(**) */ /** Run directory. */ std::string run_directory; /**, \n trick_units(--) */ Trick::JobData * curr_job ; /**< trick_io(**) */ /** Return code to be returned by Executive:init(), Executive::loop(), and Executive::shutdown() \n */ int except_return ; /**< trick_io(**) */ /** File name of exceptions caught in init() and loop()\n */ std::string except_file ; /**< trick_io(**) */ /** Error message of exceptions caught in init() and loop()\n */ std::string except_message ; /**< trick_io(**) */ /** Jobs to be run by the master during initialization. */ Trick::ScheduledJobQueue master_init_queue; /**< \n trick_units(--) */ /** Jobs to be run by the master before each run. */ Trick::ScheduledJobQueue master_pre_queue; /**< \n trick_units(--) */ /** Jobs to the run by the master after each run. */ Trick::ScheduledJobQueue master_post_queue; /**< \n trick_units(--) */ /** Jobs to be run by the master during shutdown. */ Trick::ScheduledJobQueue master_shutdown_queue; /**< \n trick_units(--) */ /** Jobs to be run by the slave during initialization. */ Trick::ScheduledJobQueue slave_init_queue; /**< \n trick_units(--) */ /** Jobs to be run by the slave before each run. */ Trick::ScheduledJobQueue slave_pre_queue; /**< \n trick_units(--) */ /** Jobs to the run by the slave after each run. */ Trick::ScheduledJobQueue slave_post_queue; /**< \n trick_units(--) */ /** Jobs to be run by the slave during shutdown. */ Trick::ScheduledJobQueue slave_shutdown_queue; /**< \n trick_units(--) */ public: /** Constructs a new MonteCarlo. */ MonteCarlo(); ~MonteCarlo(); /** * S_define level job. Sends sims through master/slave logic if monte carlo is enabled. * * @return 0 on success */ int execute_monte(); /** * Sets #enabled. * * @see @ref MonteCarloEnabling "Enabling Monte Carlo" */ void set_enabled(bool enabled); /** * Gets #enabled. * * @see @ref MonteCarloEnabling "Enabling Monte Carlo" */ bool get_enabled(); /** * Sets #dry_run. * * @see @ref MonteCarloDryRun "Dry Run" */ void set_dry_run(bool dry_run); /** * Gets #dry_run. * * @see @ref MonteCarloDryRun "Dry Run" */ bool get_dry_run(); /** * Returns true if executive is running as the slave, * based on value of slave_id (which is > 0 for slave). */ bool is_slave(); /** * Returns true if executive is running as the master, * based on the value of slave_id (which is 0 for master). */ bool is_master(); /** * Sets #localhost_as_remote. */ void set_localhost_as_remote(bool localhost_as_remote); /** * Gets #localhost_as_remote. */ bool get_localhost_as_remote(); /** * Sets #custom_slave_dispatch. */ void set_custom_slave_dispatch(bool custom_slave_dispatch); /** * Gets #custom_slave_dispatch. */ bool get_custom_slave_dispatch(); /** * Sets #timeout. */ void set_timeout(double timeout); /** * Gets #timeout. */ double get_timeout(); /** * Sets #max_tries. */ void set_max_tries(unsigned int max_tries); /** * Gets #max_tries. */ unsigned int get_max_tries(); /** * Sets #user_cmd_string. */ void set_user_cmd_string(std::string user_cmd_string); /** * Gets #user_cmd_string. */ std::string get_user_cmd_string(); /** * Sets #custom_pre_text. */ void set_custom_pre_text(std::string custom_pre_text); /** * Gets #custom_pre_text. */ std::string get_custom_pre_text(); /** * Sets #custom_post_text. */ void set_custom_post_text(std::string custom_post_text); /** * Gets #custom_post_text. */ std::string get_custom_post_text(); /** * Sets #verbosity. */ void set_verbosity(Verbosity verbosity); /** * Gets #verbosity. */ Verbosity get_verbosity(); /** * Sets #num_runs. * * @see @ref MonteCarloRuns "Specifying the Number of Runs" */ void set_num_runs(unsigned int num_runs); /** * Gets #num_runs. * * @see @ref MonteCarloRuns "Specifying the Number of Runs" */ unsigned int get_num_runs(); /** * Gets #num_results. */ unsigned int get_num_results(); /** * Gets #slave_id. */ unsigned int get_slave_id(); /** * Adds the specified range to the list of valid ranges. * * @param start the starting run's id * @param end the ending run's id * * @see MonteRun::id * @see MonteRange * @see @ref MonteCarloRanges "Specifying Valid Ranges" */ void add_range(unsigned int start, unsigned int end = 0); /** * Determines if the specified run falls within a valid range. * * @param run the run in question * * @see @ref MonteCarloRanges "Specifying Valid Ranges" */ bool in_range(Trick::MonteRun *run); /** * Copies the current vector of valid run ranges into the argument vector. * * @param ranges the vector into which the ranges will be copied * * @see @ref MonteCarloRanges "Specifying Valid Ranges" */ void get_ranges(std::vector &ranges); /** * Adds the specified variable. * * @param variable the variable to add * * @see @ref MonteCarloVariables "Adding Variables" */ void add_variable(Trick::MonteVar *variable); /** * Adds a new slave with the specified machine name. * * @param machine_name the target machine's name * * @see @ref MonteCarloAddingSlaves "Adding Slaves" */ void add_slave(std::string machine_name); /** * Adds the specified slave. * * @param slave the slave to add * * @see @ref MonteCarloAddingSlaves "Adding Slaves" */ void add_slave(Trick::MonteSlave *slave); /** * Starts the slave with the specified id if it exists. Starting a slave puts it into a state in which it * continuously accepts and processes runs from the master. * * @param id the id of the slave to start * * @see MonteSlave::id */ void start_slave(unsigned int id); /** * Stops the slave with the specified id if it exists. Stopping a slave puts it into a state in which it will not * accept new runs from the master. * * @param id the id of the slave to stop * * @see MonteSlave::id */ void stop_slave(unsigned int id); /** * Disables the slave at initialization of the Master. Must be called before master_init is called * (i.e. in input file, default_data jobs, or initialization jobs with a phase number = 0) * * @param name the name of the slave to disable * @param disabled true if the slave should be disabled, false if the slave should be used. */ void disable_slave(std::string name, bool disabled); /** * Processes command line arguments specific to Monte Carlo simulations. * * @return 0 on success */ int process_sim_args(); /** * Performs a Monte Carlo specific shutdown before calling Executive::shutdown(). * * @return 0 on success */ int shutdown(); /** Gets #current_run being processed * * @return the current run number */ unsigned int get_current_run() ; /** Sets the #current_run being processed * * @param run_num the number to set the run */ void set_current_run(int run_num) ; /** Retrieves the #data_connection_device * * @return the address of the data_connection_device */ TCDevice* get_data_connection_device(); /** Allows the user to set the port number for * the listen_device * * @param port_number number for the port */ void set_listen_device_port(int port_number) ; /** Allows the user to set the port number for * the data_listen_device * * @param port_number number for the port */ void set_data_listen_device_port(int port_number) ; /** Allows the user to set the port number for * the connection_device * * @param port_number number for the port */ void set_connection_device_port(int port_number) ; /** Allows the user to set the port number for * the data_connection_device * * @param port_number number for the port */ void set_data_connection_device_port(int port_number) ; /** Allows the user to get the port number for * the listen_device * * @return the port number */ int get_listen_device_port() ; /** Allows the user to get the port number for * the data_listen_device * * @return the port number */ int get_data_listen_device_port() ; /** Allows the user to get the port number for * the connection_device * * @return the port number */ int get_connection_device_port() ; /** Allows the user to get the port number for * the data_connection_device * * @return the port number */ int get_data_connection_device_port() ; #if 0 /** Overload default implentation of Scheduler::add_sim_object */ virtual int add_sim_object( Trick::SimObject * in_object ) ; #endif protected: /** * Initializes sockets. * * @return 0 on success */ int socket_init(TCDevice *listen_device); /** * Initializes the master. * * @return 0 on success */ int master_init(); /** * Begins Monte Carlo simulation as the master. * * @return 0 on success */ int master(); /** * Spawns all uninitialized slaves. * * @see MonteSlave::state */ void spawn_slaves(); /** Receives from any slaves that are ready to return results. */ void receive_results(); /** Receives the results from the slave */ void receive_slave_results() ; void read_machine_name(MonteSlave *curr_slave); void set_disconnected_state(MonteSlave *curr_slave); void read_slave_port(MonteSlave *curr_slave); /** * Handles the retrying of the current run of the specified slave with the specified exit status. * * @param slave the slave processing the run * @param exit_status the exit status of the run * * @see max_tries */ void handle_retry(MonteSlave *slave, MonteRun::ExitStatus exit_status); /** * Resolves the current run of the specified slave with the specified exit status. * * @param slave the slave processing the run * @param exit_status the exit status of the run */ void resolve_run(MonteSlave *slave, MonteRun::ExitStatus exit_status); /** Checks dispatched runs for timeouts. */ void check_timeouts(); /** * Gets a slave that is ready for a new dispatch. * * @return a ready slave, or NULL if there is none */ MonteSlave *get_ready_slave(); /** * Gets the slave with the specified id. * * @param id the slave's id * * @return the slave, or NULL if no such slave exists * * @see MonteSlave::id */ MonteSlave *get_slave(unsigned int id); /** * Gets the index within #slaves for the slave with the specified id. * * @param id the slave's id * * @return the slave's index, or -1 if no such slave exists * * @see MonteSlave::id */ int get_slave_index(unsigned int id); /** * Gets the next run to be dispatched. * * @return the next run, or NULL if there is none */ MonteRun *get_next_dispatch(); /** * Prepares the specified run for dispatch. * * @param run the run to initialize */ int prepare_run(MonteRun *run); /** Removes the specified run, if present, from #runs. */ void dequeue_run(MonteRun *run); /** * Dispatches the specified run to the specified slave. * * @param run the run to dispatch * @param slave the target slave */ void dispatch_run_to_slave(MonteRun *run, MonteSlave *slave); /** Updates the #num_slaves and #slaves_head to reflect the #slaves. */ void sync_slaves_head(); /** Updates #actual_num_runs. */ void update_actual_num_runs(); /** Shuts down the master. */ void master_shutdown(); /** * Initializes the slave. * * @return 0 on success */ int slave_init(); /** * Begins Monte Carlo simulation as a slave. * * @return 0 on success */ int slave(); /** Processes an incoming run. */ int slave_process_run(); /** Shuts down the slave. */ void slave_shutdown(); /** Kills the slave. */ void slave_die(); /** Kills the current run. */ void slave_kill_run(); int instrument_job_before(Trick::JobData* instrument_job); int instrument_job_after(Trick::JobData* instrument_job); int instrument_job_remove(std::string in_job); int write_s_job_execution(FILE* fp); /** * Determines if the specified stings are equivalent, ignoring case. * * @param string1 the first string * @param string2 the second string * * @return the case-insensitive equivalency */ bool equals_ignore_case(std::string string1, std::string string2); // I am appalled by having to write this myself. }; }; #endif