mirror of
https://github.com/nasa/trick.git
synced 2024-12-19 05:07:54 +00:00
Update all enums in MonteCarlo with prefixes, so as to not conflict with other enums. (#650)
This commit is contained in:
parent
6f82d11122
commit
84f67ef79f
@ -93,23 +93,23 @@ namespace Trick {
|
||||
public:
|
||||
/** Operational state. */
|
||||
enum State {
|
||||
UNINITIALIZED, /**< newly created */
|
||||
INITIALIZING, /**< starting up */
|
||||
READY, /**< awaiting new run */
|
||||
RUNNING, /**< processing a run */
|
||||
STOPPING, /**< stopping after current run */
|
||||
STOPPED, /**< not accepting new runs */
|
||||
FINISHED, /**< completed all runs */
|
||||
UNRESPONSIVE_RUNNING, /**< timed out and in a running state */
|
||||
UNRESPONSIVE_STOPPING, /**< timed out and in a stopping state */
|
||||
DISCONNECTED /**< lost connection */
|
||||
MC_UNINITIALIZED, /**< newly created */
|
||||
MC_INITIALIZING, /**< starting up */
|
||||
MC_READY, /**< awaiting new run */
|
||||
MC_RUNNING, /**< processing a run */
|
||||
MC_STOPPING, /**< stopping after current run */
|
||||
MC_STOPPED, /**< not accepting new runs */
|
||||
MC_FINISHED, /**< completed all runs */
|
||||
MC_UNRESPONSIVE_RUNNING, /**< timed out and in a running state */
|
||||
MC_UNRESPONSIVE_STOPPING, /**< timed out and in a stopping state */
|
||||
MC_DISCONNECTED /**< lost connection */
|
||||
};
|
||||
|
||||
/** Master-to-slave commands. */
|
||||
enum Command {
|
||||
PROCESS_RUN, /**< process a new run */
|
||||
SHUTDOWN, /**< kill any executing run, call shutdown jobs, and shutdown cleanly */
|
||||
DIE /**< kill any executing run, do not call shutdown jobs, and exit */
|
||||
MC_PROCESS_RUN, /**< process a new run */
|
||||
MC_SHUTDOWN, /**< kill any executing run, call shutdown jobs, and shutdown cleanly */
|
||||
MC_DIE /**< kill any executing run, do not call shutdown jobs, and exit */
|
||||
};
|
||||
|
||||
/** Unique identifier assigned by the master. */
|
||||
@ -166,7 +166,7 @@ namespace Trick {
|
||||
*/
|
||||
MonteSlave(std::string name = "localhost") :
|
||||
id(0),
|
||||
state(UNINITIALIZED),
|
||||
state(MC_UNINITIALIZED),
|
||||
port(0),
|
||||
current_run(NULL),
|
||||
num_dispatches(0),
|
||||
@ -248,10 +248,10 @@ namespace Trick {
|
||||
public:
|
||||
/** Verbosity of message reporting. */
|
||||
enum Verbosity {
|
||||
NONE, /**< report no messages */
|
||||
ERROR, /**< report error messages */
|
||||
INFORMATIONAL, /**< report error and informational messages, no warning messages */
|
||||
ALL /**< report all messages (error, informational & warning) */
|
||||
MC_NONE, /**< report no messages */
|
||||
MC_ERROR, /**< report error messages */
|
||||
MC_INFORMATIONAL, /**< report error and informational messages, no warning messages */
|
||||
MC_ALL /**< report all messages (error, informational & warning) */
|
||||
};
|
||||
|
||||
/** Options to be passed to the slave sim. */
|
||||
|
@ -10,7 +10,7 @@ Trick::MonteCarlo::MonteCarlo() :
|
||||
custom_slave_dispatch(false),
|
||||
timeout(120),
|
||||
max_tries(2),
|
||||
verbosity(INFORMATIONAL),
|
||||
verbosity(MC_INFORMATIONAL),
|
||||
num_runs(0),
|
||||
actual_num_runs(0),
|
||||
num_results(0),
|
||||
|
@ -14,7 +14,7 @@ void Trick::MonteCarlo::dispatch_run_to_slave(MonteRun *run, MonteSlave *slave)
|
||||
if (prepare_run(run) == -1) {
|
||||
return;
|
||||
}
|
||||
slave->state = MonteSlave::RUNNING;
|
||||
slave->state = MonteSlave::MC_RUNNING;
|
||||
connection_device.hostname = (char*)slave->machine_name.c_str();
|
||||
connection_device.port = slave->port;
|
||||
if (tc_connect(&connection_device) == TC_SUCCESS) {
|
||||
@ -29,18 +29,18 @@ void Trick::MonteCarlo::dispatch_run_to_slave(MonteRun *run, MonteSlave *slave)
|
||||
buffer_stream << run->id ;
|
||||
buffer += std::string("trick.mc_set_current_run(") + buffer_stream.str() + std::string(")\n");
|
||||
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Dispatching run %d to %s:%d.\n",
|
||||
run->id, slave->machine_name.c_str(), slave->id) ;
|
||||
}
|
||||
|
||||
int command = htonl(MonteSlave::PROCESS_RUN);
|
||||
int command = htonl(MonteSlave::MC_PROCESS_RUN);
|
||||
tc_write(&connection_device, (char *)&command, (int)sizeof(command));
|
||||
int num_bytes = htonl(buffer.length());
|
||||
tc_write(&connection_device, (char*)&num_bytes, (int)sizeof(num_bytes));
|
||||
tc_write(&connection_device, (char*)buffer.c_str(), (int)buffer.length());
|
||||
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Parameterization of run %d :\n%s\n", run->id, buffer.c_str()) ;
|
||||
}
|
||||
|
||||
@ -54,8 +54,8 @@ void Trick::MonteCarlo::dispatch_run_to_slave(MonteRun *run, MonteSlave *slave)
|
||||
run->start_time = time_val.tv_sec + (double)time_val.tv_usec / 1000000;
|
||||
++run->num_tries;
|
||||
} else {
|
||||
slave->state = Trick::MonteSlave::DISCONNECTED;
|
||||
if (verbosity >= ERROR) {
|
||||
slave->state = Trick::MonteSlave::MC_DISCONNECTED;
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Failed to connect to %s:%d to dispatch run.\n",
|
||||
slave->machine_name.c_str(), slave->id) ;
|
||||
}
|
||||
|
@ -201,38 +201,38 @@ void Trick::MonteCarlo::add_slave(Trick::MonteSlave *in_slave) {
|
||||
|
||||
/**
|
||||
* @par Detailed Design:
|
||||
* This function has an effect only if the slave exists and is in the STOPPING, UNRESPONSIVE_STOPPING, or STOPPED state.
|
||||
* This function has an effect only if the slave exists and is in the MC_STOPPING, MC_UNRESPONSIVE_STOPPING, or MC_STOPPED state.
|
||||
*/
|
||||
void Trick::MonteCarlo::start_slave(unsigned int id) {
|
||||
if (MonteSlave *slave = get_slave(id)) {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Starting %s:%d.\n", slave->machine_name.c_str(), slave->id) ;
|
||||
}
|
||||
if (slave->state == Trick::MonteSlave::STOPPING) {
|
||||
slave->state = Trick::MonteSlave::RUNNING;
|
||||
} else if (slave->state == Trick::MonteSlave::UNRESPONSIVE_STOPPING) {
|
||||
slave->state = Trick::MonteSlave::UNRESPONSIVE_RUNNING;
|
||||
} else if (slave->state == Trick::MonteSlave::STOPPED) {
|
||||
slave->state = Trick::MonteSlave::READY;
|
||||
if (slave->state == Trick::MonteSlave::MC_STOPPING) {
|
||||
slave->state = Trick::MonteSlave::MC_RUNNING;
|
||||
} else if (slave->state == Trick::MonteSlave::MC_UNRESPONSIVE_STOPPING) {
|
||||
slave->state = Trick::MonteSlave::MC_UNRESPONSIVE_RUNNING;
|
||||
} else if (slave->state == Trick::MonteSlave::MC_STOPPED) {
|
||||
slave->state = Trick::MonteSlave::MC_READY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @par Detailed Design:
|
||||
* This function has an effect only if the slave exists and is in the READY, RUNNING, or UNRESPONSIVE_RUNNING state.
|
||||
* This function has an effect only if the slave exists and is in the MC_READY, MC_RUNNING, or MC_UNRESPONSIVE_RUNNING state.
|
||||
*/
|
||||
void Trick::MonteCarlo::stop_slave(unsigned int id) {
|
||||
if (MonteSlave *slave = get_slave(id)) {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Stopping %s:%d.\n", slave->machine_name.c_str(), slave->id) ;
|
||||
}
|
||||
if (slave->state == Trick::MonteSlave::READY) {
|
||||
slave->state = Trick::MonteSlave::STOPPED;
|
||||
} else if (slave->state == Trick::MonteSlave::RUNNING) {
|
||||
slave->state = Trick::MonteSlave::STOPPING;
|
||||
} else if (slave->state == Trick::MonteSlave::UNRESPONSIVE_RUNNING) {
|
||||
slave->state = Trick::MonteSlave::UNRESPONSIVE_STOPPING;
|
||||
if (slave->state == Trick::MonteSlave::MC_READY) {
|
||||
slave->state = Trick::MonteSlave::MC_STOPPED;
|
||||
} else if (slave->state == Trick::MonteSlave::MC_RUNNING) {
|
||||
slave->state = Trick::MonteSlave::MC_STOPPING;
|
||||
} else if (slave->state == Trick::MonteSlave::MC_UNRESPONSIVE_RUNNING) {
|
||||
slave->state = Trick::MonteSlave::MC_UNRESPONSIVE_STOPPING;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -245,9 +245,9 @@ void Trick::MonteCarlo::disable_slave(std::string name, bool disabled){
|
||||
for (std::vector<MonteSlave *>::size_type i = 0; i < slaves.size(); ++i) {
|
||||
if (equals_ignore_case(slaves[i]->machine_name, name)) {
|
||||
if (disabled) {
|
||||
slaves[i]->state = Trick::MonteSlave::STOPPED;
|
||||
slaves[i]->state = Trick::MonteSlave::MC_STOPPED;
|
||||
} else {
|
||||
slaves[i]->state = Trick::MonteSlave::UNINITIALIZED;
|
||||
slaves[i]->state = Trick::MonteSlave::MC_UNINITIALIZED;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -279,7 +279,7 @@ int Trick::MonteCarlo::shutdown() {
|
||||
connection_device.port = master_port;
|
||||
if (tc_connect(&connection_device) == TC_SUCCESS) {
|
||||
int exit_status = MonteRun::MC_RUN_COMPLETE;
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Sending run exit status to master: %d\n",
|
||||
machine_name.c_str(), slave_id, exit_status) ;
|
||||
}
|
||||
@ -290,7 +290,7 @@ int Trick::MonteCarlo::shutdown() {
|
||||
run_queue(&slave_post_queue, "in slave_post queue");
|
||||
tc_disconnect(&connection_device);
|
||||
} else {
|
||||
if (verbosity >= ERROR)
|
||||
if (verbosity >= MC_ERROR)
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [%s:%d] Failed to connect to master.\n",
|
||||
@ -303,12 +303,12 @@ int Trick::MonteCarlo::shutdown() {
|
||||
void Trick::MonteCarlo::handle_retry(MonteSlave& slave, MonteRun::ExitStatus exit_status) {
|
||||
if (max_tries <= 0 || slave.current_run->num_tries < max_tries) {
|
||||
// Add the run to the retry queue.
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Queueing run %d for retry.\n", slave.current_run->id) ;
|
||||
}
|
||||
runs.push_back(slave.current_run);
|
||||
} else {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Run %d has reached its maximum allowed tries and has been skipped.\n",
|
||||
slave.current_run->id) ;
|
||||
}
|
||||
@ -333,7 +333,7 @@ void Trick::MonteCarlo::resolve_run(MonteSlave& slave, MonteRun::ExitStatus exit
|
||||
|
||||
++num_results;
|
||||
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Run %d has been resolved as: %d.\n",slave.current_run->id, exit_status) ;
|
||||
}
|
||||
}
|
||||
@ -345,7 +345,7 @@ void Trick::MonteCarlo::check_timeouts() {
|
||||
/** <ul><li> For every slave: */
|
||||
for (std::vector<Trick::MonteSlave *>::size_type i = 0; i < slaves.size(); ++i) {
|
||||
/** <ul><li> If the slave has timed out: */
|
||||
if ((slaves[i]->state == MonteSlave::RUNNING || slaves[i]->state == MonteSlave::STOPPING) &&
|
||||
if ((slaves[i]->state == MonteSlave::MC_RUNNING || slaves[i]->state == MonteSlave::MC_STOPPING) &&
|
||||
(time_val.tv_sec + (double)time_val.tv_usec / 1000000 - slaves[i]->current_run->start_time) *
|
||||
slaves[i]->multiplier > timeout) {
|
||||
/**
|
||||
@ -353,22 +353,22 @@ void Trick::MonteCarlo::check_timeouts() {
|
||||
* data later. Only process this timeout if the run hasn't been resolved yet.
|
||||
*/
|
||||
if (slaves[i]->current_run->exit_status == MonteRun::MC_RUN_INCOMPLETE) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] %s:%d has not responded for run %d.\n",
|
||||
slaves[i]->machine_name.c_str(), slaves[i]->id, slaves[i]->current_run->id) ;
|
||||
}
|
||||
handle_retry(*slaves[i], MonteRun::MC_RUN_TIMED_OUT);
|
||||
}
|
||||
/** </ul><li> Update the slave's state. */
|
||||
slaves[i]->state = slaves[i]->state == MonteSlave::RUNNING ?
|
||||
MonteSlave::UNRESPONSIVE_RUNNING : MonteSlave::UNRESPONSIVE_STOPPING;
|
||||
slaves[i]->state = slaves[i]->state == MonteSlave::MC_RUNNING ?
|
||||
MonteSlave::MC_UNRESPONSIVE_RUNNING : MonteSlave::MC_UNRESPONSIVE_STOPPING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Trick::MonteSlave * Trick::MonteCarlo::get_ready_slave() {
|
||||
for (std::vector<Trick::MonteSlave>::size_type i = 0; i < slaves.size(); ++i) {
|
||||
if (slaves[i]->state == Trick::MonteSlave::READY) {
|
||||
if (slaves[i]->state == Trick::MonteSlave::MC_READY) {
|
||||
return slaves[i];
|
||||
}
|
||||
}
|
||||
@ -402,7 +402,7 @@ Trick::MonteRun *Trick::MonteCarlo::get_next_dispatch() {
|
||||
return curr_run;
|
||||
/** <li> Otherwise, run the pre run jobs and dequeue it. */
|
||||
} else {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_WARNING, "Monte [Master] Run %d is out of range and has been skipped.\n", curr_run->id) ;
|
||||
}
|
||||
prepare_run(curr_run);
|
||||
@ -422,7 +422,7 @@ int Trick::MonteCarlo::prepare_run(MonteRun *curr_run) {
|
||||
for (std::vector<std::string>::size_type i = 0; i < variables.size(); ++i) {
|
||||
curr_run->variables.push_back(variables[i]->get_next_value());
|
||||
if (curr_run->variables.back() == "EOF") {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_WARNING, "Monte [Master] File variable '%s' reached end-of-file. Reducing number of runs to %d.\n",
|
||||
variables[i]->name.c_str(), curr_run->id) ;
|
||||
}
|
||||
@ -484,7 +484,7 @@ void Trick::MonteCarlo::update_actual_num_runs() {
|
||||
}
|
||||
/** <li> Add one for every currently dispatched run. */
|
||||
for (std::vector<MonteSlave *>::size_type i = 0; i < slaves.size(); ++i) {
|
||||
if (slaves[i]->state == MonteSlave::RUNNING || slaves[i]->state == MonteSlave::STOPPING) {
|
||||
if (slaves[i]->state == MonteSlave::MC_RUNNING || slaves[i]->state == MonteSlave::MC_STOPPING) {
|
||||
++actual_num_runs;
|
||||
}
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ int Trick::MonteCarlo::initialize_sockets() {
|
||||
/** <ul><li> Initialize the sockets for communication with slaves. */
|
||||
int return_value = tc_init(&listen_device);
|
||||
if (return_value != TC_SUCCESS) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Failed to initialize status communication socket.\n") ;
|
||||
}
|
||||
return return_value;
|
||||
@ -26,7 +26,7 @@ int Trick::MonteCarlo::initialize_sockets() {
|
||||
|
||||
/** <li> If no slaves were specified, add one on localhost. */
|
||||
if (slaves.empty()) {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_WARNING, "Monte [Master] No slaves specified. Adding localhost as the sole slave.\n") ;
|
||||
}
|
||||
add_slave(new MonteSlave());
|
||||
|
@ -13,7 +13,7 @@ int Trick::MonteCarlo::construct_run_directory() {
|
||||
if (run_directory.empty()) {
|
||||
std::string run_base_directory = command_line_args_get_output_dir();
|
||||
if (run_base_directory.empty()) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Could not get the output directory.\n") ;
|
||||
}
|
||||
return -1;
|
||||
@ -28,7 +28,7 @@ int Trick::MonteCarlo::construct_run_directory() {
|
||||
|
||||
run_directory = basename((char *)command_line_args_get_output_dir());
|
||||
if (run_directory.empty()) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Could not get the run directory name.\n") ;
|
||||
}
|
||||
return -1;
|
||||
@ -48,7 +48,7 @@ int Trick::MonteCarlo::construct_run_directory() {
|
||||
int Trick::MonteCarlo::open_file(std::string file_name, FILE** file_ptr) {
|
||||
|
||||
if ((*file_ptr = fopen(file_name.c_str(), "w")) == NULL) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Could not open %s for writing.\n", file_name.c_str()) ;
|
||||
}
|
||||
return -1;
|
||||
|
@ -47,16 +47,16 @@ void Trick::MonteCarlo::master_shutdown() {
|
||||
|
||||
void Trick::MonteCarlo::shutdown_slaves() {
|
||||
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Simulation complete. Shutting down slaves.\n\n") ;
|
||||
}
|
||||
|
||||
for (std::vector<MonteSlave *>::size_type i = 0; i < slaves.size() ; ++i) {
|
||||
slaves[i]->state = MonteSlave::FINISHED;
|
||||
slaves[i]->state = MonteSlave::MC_FINISHED;
|
||||
connection_device.hostname = (char*)slaves[i]->machine_name.c_str();
|
||||
connection_device.port = slaves[i]->port;
|
||||
if (tc_connect(&connection_device) == TC_SUCCESS) {
|
||||
int command = htonl(MonteSlave::SHUTDOWN);
|
||||
int command = htonl(MonteSlave::MC_SHUTDOWN);
|
||||
tc_write(&connection_device, (char*)&command, sizeof(command));
|
||||
}
|
||||
}
|
||||
|
@ -27,10 +27,10 @@ void Trick::MonteCarlo::receive_results() {
|
||||
}
|
||||
|
||||
/**
|
||||
* <li> If the slave is in the INITIALIZING state, it is sending us the
|
||||
* <li> If the slave is in the MC_INITIALIZING state, it is sending us the
|
||||
* machine name and port over which it is listening for new runs.
|
||||
*/
|
||||
if (slave->state == MonteSlave::INITIALIZING) {
|
||||
if (slave->state == MonteSlave::MC_INITIALIZING) {
|
||||
handle_initialization(*slave);
|
||||
}
|
||||
/** <li> Otherwise, it's sending us run data. */
|
||||
@ -41,7 +41,7 @@ void Trick::MonteCarlo::receive_results() {
|
||||
}
|
||||
|
||||
void Trick::MonteCarlo::handle_initialization(Trick::MonteSlave& slave) {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(
|
||||
MSG_INFO,
|
||||
"Monte [Master] Receiving initialization information from %s:%d.\n",
|
||||
@ -70,12 +70,12 @@ void Trick::MonteCarlo::handle_initialization(Trick::MonteSlave& slave) {
|
||||
}
|
||||
slave.port = ntohl(slave.port);
|
||||
|
||||
slave.state = MonteSlave::READY;
|
||||
slave.state = MonteSlave::MC_READY;
|
||||
tc_disconnect(&connection_device);
|
||||
}
|
||||
|
||||
void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte [Master] Receiving results for run %d from %s:%d.\n",
|
||||
slave.current_run->id, slave.machine_name.c_str(), slave.id) ;
|
||||
}
|
||||
@ -97,7 +97,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
// jobs, the master will receive a MC_RUN_COMPLETE status from the slave's
|
||||
// child process and then an error status from the parent, rendering
|
||||
// this message incorrect.
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(
|
||||
MSG_INFO,
|
||||
"Monte [Master] Run %d has already been resolved. Discarding results.\n",
|
||||
@ -124,7 +124,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
break;
|
||||
|
||||
case MonteRun::MC_PROBLEM_PARSING_INPUT:
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported bad input for run %d. Skipping.\n",
|
||||
@ -134,7 +134,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
break;
|
||||
|
||||
case MonteRun::MC_RUN_DUMPED_CORE:
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported core dump for run %d. Skipping.\n",
|
||||
@ -144,7 +144,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
break;
|
||||
|
||||
case MonteRun::MC_CANT_CREATE_OUTPUT_DIR:
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported a failure to create output directories for run %d.\n",
|
||||
@ -160,20 +160,20 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
* then that slave itself reports a timeout. </ul>
|
||||
*/
|
||||
case MonteRun::MC_RUN_TIMED_OUT:
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported a timeout for run %d.\n",
|
||||
slave.machine_name.c_str(), slave.id, slave.current_run->id);
|
||||
}
|
||||
if (slave.state != MonteSlave::UNRESPONSIVE_RUNNING &&
|
||||
slave.state != MonteSlave::UNRESPONSIVE_STOPPING) {
|
||||
if (slave.state != MonteSlave::MC_UNRESPONSIVE_RUNNING &&
|
||||
slave.state != MonteSlave::MC_UNRESPONSIVE_STOPPING) {
|
||||
handle_retry(slave, MonteRun::MC_RUN_TIMED_OUT);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported unrecognized exit status (%d) for run %d. Skipping.\n",
|
||||
@ -186,16 +186,16 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
tc_disconnect(&connection_device);
|
||||
|
||||
/** <li> Update the slave's state. */
|
||||
if (slave.state == MonteSlave::RUNNING || slave.state == MonteSlave::UNRESPONSIVE_RUNNING) {
|
||||
slave.state = MonteSlave::READY;
|
||||
} else if (slave.state == MonteSlave::STOPPING || slave.state == MonteSlave::UNRESPONSIVE_STOPPING) {
|
||||
slave.state = MonteSlave::STOPPED;
|
||||
if (slave.state == MonteSlave::MC_RUNNING || slave.state == MonteSlave::MC_UNRESPONSIVE_RUNNING) {
|
||||
slave.state = MonteSlave::MC_READY;
|
||||
} else if (slave.state == MonteSlave::MC_STOPPING || slave.state == MonteSlave::MC_UNRESPONSIVE_STOPPING) {
|
||||
slave.state = MonteSlave::MC_STOPPED;
|
||||
}
|
||||
}
|
||||
|
||||
void Trick::MonteCarlo::set_disconnected_state(Trick::MonteSlave& slave) {
|
||||
slave.state = Trick::MonteSlave::DISCONNECTED;
|
||||
if (verbosity >= ERROR) {
|
||||
slave.state = Trick::MonteSlave::MC_DISCONNECTED;
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] Lost connection to %s:%d.\n",
|
||||
slave.machine_name.c_str(), slave.id) ;
|
||||
}
|
||||
|
@ -9,13 +9,13 @@ int Trick::MonteCarlo::execute_as_slave() {
|
||||
|
||||
/** <li> Forever: */
|
||||
while (true) {
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Waiting for new run.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
/** <ul><li> On a blocking read, wait for a MonteSlave::Command from the master. */
|
||||
if (tc_accept(&listen_device, &connection_device) != TC_SUCCESS) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Lost connection to Master. Shutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -23,7 +23,7 @@ int Trick::MonteCarlo::execute_as_slave() {
|
||||
}
|
||||
int command;
|
||||
if (tc_read(&connection_device, (char *)&command, (int)sizeof(command)) != (int)sizeof(command)) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Lost connection to Master while receiving instructions. Shutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -31,9 +31,9 @@ int Trick::MonteCarlo::execute_as_slave() {
|
||||
}
|
||||
switch (command = ntohl(command)) {
|
||||
int return_value;
|
||||
case MonteSlave::PROCESS_RUN:
|
||||
case MonteSlave::MC_PROCESS_RUN:
|
||||
/**
|
||||
* <ul><li> MonteSlave::PROCESS_RUN: Call #slave_process_run. This will return a non-zero value when run in a
|
||||
* <ul><li> MonteSlave::MC_PROCESS_RUN: Call #slave_process_run. This will return a non-zero value when run in a
|
||||
* child process to indicate that this function should return so that the sim can complete.
|
||||
*/
|
||||
return_value = slave_process_run();
|
||||
@ -41,17 +41,17 @@ int Trick::MonteCarlo::execute_as_slave() {
|
||||
return return_value;
|
||||
}
|
||||
break;
|
||||
case MonteSlave::SHUTDOWN:
|
||||
/** <li> MonteSlave::SHUTDOWN: Call #slave_shutdown. */
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
case MonteSlave::MC_SHUTDOWN:
|
||||
/** <li> MonteSlave::MC_SHUTDOWN: Call #slave_shutdown. */
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Shutdown command received from Master. Shutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
slave_shutdown();
|
||||
break;
|
||||
case MonteSlave::DIE:
|
||||
case MonteSlave::MC_DIE:
|
||||
/** <li> MonteSlave::DIE: Call #slave_die. */
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Killed by Master.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -59,7 +59,7 @@ int Trick::MonteCarlo::execute_as_slave() {
|
||||
break;
|
||||
default:
|
||||
/** <li> default: Call #slave_shutdown. */
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Unrecognized command %d received from Master. Shutting down.\n",
|
||||
machine_name.c_str(), slave_id, command) ;
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ int Trick::MonteCarlo::slave_init() {
|
||||
|
||||
if (access(run_directory.c_str(), F_OK) != 0) {
|
||||
if (mkdir(run_directory.c_str(), 0775) == -1) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Unable to create directory %s.\nTerminating.\n",
|
||||
run_directory.c_str(), machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -35,14 +35,14 @@ int Trick::MonteCarlo::slave_init() {
|
||||
/** <li> Connect to the master and write the port over which we are listening for new runs. */
|
||||
connection_device.port = master_port;
|
||||
if (tc_connect(&connection_device) != TC_SUCCESS) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Failed to initialize communication sockets.\nTerminating.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Making initial connection with Master.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
int size;
|
||||
/** <ul><li> Read the length of the incoming message. */
|
||||
if (tc_read(&connection_device, (char *)&size, (int)sizeof(size)) != (int)sizeof(size) || (size = ntohl(size)) < 0) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Lost connection to Master while receiving new run.\nShutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -25,7 +25,7 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
char *input = new char[size + 1];
|
||||
/** <li> Read the incoming message. */
|
||||
if (tc_read(&connection_device, input, size) != size) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Lost connection to Master while receiving new run.\nShutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -40,7 +40,7 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
*/
|
||||
pid_t pid = fork();
|
||||
if (pid == -1) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Unable to fork new process for run.\nShutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -52,7 +52,7 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
if (waitpid(pid, &return_value, 0) == -1) {
|
||||
/* (Alex) On the Mac this check gives a lot of false positives. I've commented out the code for now. */
|
||||
/*
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Error while waiting for run to finish.\nShutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
@ -71,19 +71,19 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
int signal = WTERMSIG(return_value);
|
||||
/** <li> Extract the exit status of the child. */
|
||||
MonteRun::ExitStatus exit_status = signal == SIGALRM ? MonteRun::MC_RUN_TIMED_OUT : MonteRun::MC_RUN_DUMPED_CORE;
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Run killed by signal %d: %s\n",
|
||||
machine_name.c_str(), slave_id, signal, strsignal(signal)) ;
|
||||
}
|
||||
connection_device.port = master_port;
|
||||
if (tc_connect(&connection_device) != TC_SUCCESS) {
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Lost connection to Master before results could be returned.\nShutting down.\n",
|
||||
machine_name.c_str(), slave_id) ;
|
||||
}
|
||||
slave_shutdown();
|
||||
}
|
||||
if (verbosity >= ALL) {
|
||||
if (verbosity >= MC_ALL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Sending run exit status to master %d.\n",
|
||||
machine_name.c_str(), slave_id, exit_status) ;
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ void Trick::MonteCarlo::spawn_slaves() {
|
||||
/** <ul><li> If the slave is in the UNINITIALZED state, then
|
||||
* set up the command string for starting the slave.
|
||||
*/
|
||||
if (slaves[i]->state == MonteSlave::UNINITIALIZED) {
|
||||
if (slaves[i]->state == MonteSlave::MC_UNINITIALIZED) {
|
||||
initialize_slave(slaves[i]) ;
|
||||
}
|
||||
}
|
||||
@ -64,12 +64,12 @@ void Trick::MonteCarlo::initialize_slave(Trick::MonteSlave* slave_to_init) {
|
||||
buffer += std::string("' &");
|
||||
}
|
||||
|
||||
if (verbosity >= INFORMATIONAL) {
|
||||
if (verbosity >= MC_INFORMATIONAL) {
|
||||
message_publish(MSG_INFO, "Monte: Spawning Slave %s:%d :\n%s\n",
|
||||
slave_to_init->machine_name.c_str(), slave_to_init->id, buffer.c_str()) ;
|
||||
}
|
||||
/** <li> Set the slave's state to INITIALIZING. */
|
||||
slave_to_init->state = MonteSlave::INITIALIZING;
|
||||
/** <li> Set the slave's state to MC_INITIALIZING. */
|
||||
slave_to_init->state = MonteSlave::MC_INITIALIZING;
|
||||
/** <li> Make the system call to execute the shell. */
|
||||
system(buffer.c_str());
|
||||
}
|
||||
@ -89,7 +89,7 @@ void Trick::MonteCarlo::default_slave_dispatch_pre_text(Trick::MonteSlave* slave
|
||||
case TRICK_USER_REMOTE_SH:
|
||||
if (slave_to_init->user_remote_shell.empty()) {
|
||||
slave_to_init->user_remote_shell = unix_ssh;
|
||||
if (verbosity >= ERROR) {
|
||||
if (verbosity >= MC_ERROR) {
|
||||
message_publish(MSG_WARNING, "Monte: TRICK_USER_REMOTE_SH specified for Slave %s:%d, but no shell given.\n"
|
||||
"Defaulting to %s.\n",
|
||||
machine_name.c_str(), slave_to_init->id, slave_to_init->user_remote_shell.c_str()) ;
|
||||
|
Loading…
Reference in New Issue
Block a user