mirror of
https://github.com/nasa/trick.git
synced 2025-01-29 15:43:57 +00:00
Fix enums in MonteCarlo ExitStatus to less susceptible to conflict wi… (#628)
* Fix enums in MonteCarlo ExitStatus to less susceptible to conflict with other code. ref #609 * Add MC_ prefix to ExitStatus enums. Ref #609
This commit is contained in:
parent
1015f6ec90
commit
882b568b86
@ -36,13 +36,13 @@ namespace Trick {
|
||||
public:
|
||||
/** Details the manner in which this run exited. */
|
||||
enum ExitStatus {
|
||||
INCOMPLETE, /**< not completed */
|
||||
COMPLETE, /**< completed with no errors */
|
||||
CORED, /**< core dumped */
|
||||
TIMEDOUT, /**< timed out */
|
||||
NO_PERM, /**< could not write output files */
|
||||
BAD_INPUT, /**< problem parseing monte carlo input */
|
||||
UNKNOWN /**< unrecognized return code */
|
||||
MC_RUN_INCOMPLETE, /**< not completed */
|
||||
MC_RUN_COMPLETE, /**< completed with no errors */
|
||||
MC_RUN_DUMPED_CORE, /**< core dumped */
|
||||
MC_RUN_TIMED_OUT, /**< timed out */
|
||||
MC_CANT_CREATE_OUTPUT_DIR, /**< could not write output files */
|
||||
MC_PROBLEM_PARSING_INPUT, /**< problem parseing monte carlo input */
|
||||
MC_UNRECOGNIZED_RETURN_CODE /**< unrecognized return code */
|
||||
};
|
||||
|
||||
/** Unique identifier sequentially assigned, starting at zero, by the master. */
|
||||
@ -73,7 +73,7 @@ namespace Trick {
|
||||
num_tries(0),
|
||||
start_time(0),
|
||||
end_time(0),
|
||||
exit_status(INCOMPLETE) {}
|
||||
exit_status(MC_RUN_INCOMPLETE) {}
|
||||
|
||||
};
|
||||
|
||||
|
@ -278,7 +278,7 @@ int Trick::MonteCarlo::shutdown() {
|
||||
if (enabled && is_slave()) {
|
||||
connection_device.port = master_port;
|
||||
if (tc_connect(&connection_device) == TC_SUCCESS) {
|
||||
int exit_status = MonteRun::COMPLETE;
|
||||
int exit_status = MonteRun::MC_RUN_COMPLETE;
|
||||
if (verbosity >= ALL) {
|
||||
message_publish(MSG_INFO, "Monte [%s:%d] Sending run exit status to master: %d\n",
|
||||
machine_name.c_str(), slave_id, exit_status) ;
|
||||
@ -318,7 +318,7 @@ void Trick::MonteCarlo::handle_retry(MonteSlave& slave, MonteRun::ExitStatus exi
|
||||
|
||||
/** @par Detailed Design: */
|
||||
void Trick::MonteCarlo::resolve_run(MonteSlave& slave, MonteRun::ExitStatus exit_status) {
|
||||
if (exit_status != MonteRun::COMPLETE) {
|
||||
if (exit_status != MonteRun::MC_RUN_COMPLETE) {
|
||||
failed_runs.push_back(slave.current_run);
|
||||
}
|
||||
|
||||
@ -352,12 +352,12 @@ void Trick::MonteCarlo::check_timeouts() {
|
||||
* <ul><li> This run might have been redispatched due to a previous timeout for which the slave actually returned
|
||||
* data later. Only process this timeout if the run hasn't been resolved yet.
|
||||
*/
|
||||
if (slaves[i]->current_run->exit_status == MonteRun::INCOMPLETE) {
|
||||
if (slaves[i]->current_run->exit_status == MonteRun::MC_RUN_INCOMPLETE) {
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [Master] %s:%d has not responded for run %d.\n",
|
||||
slaves[i]->machine_name.c_str(), slaves[i]->id, slaves[i]->current_run->id) ;
|
||||
}
|
||||
handle_retry(*slaves[i], MonteRun::TIMEDOUT);
|
||||
handle_retry(*slaves[i], MonteRun::MC_RUN_TIMED_OUT);
|
||||
}
|
||||
/** </ul><li> Update the slave's state. */
|
||||
slaves[i]->state = slaves[i]->state == MonteSlave::RUNNING ?
|
||||
|
@ -92,9 +92,9 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
* this slave was marked as having timed out. If that is the case,
|
||||
* discard these results.
|
||||
*/
|
||||
if (slave.current_run->exit_status != MonteRun::INCOMPLETE) {
|
||||
if (slave.current_run->exit_status != MonteRun::MC_RUN_INCOMPLETE) {
|
||||
// TODO: If a slave times out or core dumps in it's monte_slave_post
|
||||
// jobs, the master will receive a COMPLETE status from the slave's
|
||||
// jobs, the master will receive a MC_RUN_COMPLETE status from the slave's
|
||||
// child process and then an error status from the parent, rendering
|
||||
// this message incorrect.
|
||||
if (verbosity >= ALL) {
|
||||
@ -118,39 +118,39 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
|
||||
switch (exit_status) {
|
||||
|
||||
case MonteRun::COMPLETE:
|
||||
resolve_run(slave, MonteRun::COMPLETE);
|
||||
case MonteRun::MC_RUN_COMPLETE:
|
||||
resolve_run(slave, MonteRun::MC_RUN_COMPLETE);
|
||||
run_queue(&master_post_queue, "in master_post queue") ;
|
||||
break;
|
||||
|
||||
case MonteRun::BAD_INPUT:
|
||||
case MonteRun::MC_PROBLEM_PARSING_INPUT:
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported bad input for run %d. Skipping.\n",
|
||||
slave.machine_name.c_str(), slave.id, slave.current_run->id) ;
|
||||
}
|
||||
resolve_run(slave, MonteRun::BAD_INPUT);
|
||||
resolve_run(slave, MonteRun::MC_PROBLEM_PARSING_INPUT);
|
||||
break;
|
||||
|
||||
case MonteRun::CORED:
|
||||
case MonteRun::MC_RUN_DUMPED_CORE:
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported core dump for run %d. Skipping.\n",
|
||||
slave.machine_name.c_str(), slave.id, slave.current_run->id) ;
|
||||
}
|
||||
resolve_run(slave, MonteRun::CORED);
|
||||
resolve_run(slave, MonteRun::MC_RUN_DUMPED_CORE);
|
||||
break;
|
||||
|
||||
case MonteRun::NO_PERM:
|
||||
case MonteRun::MC_CANT_CREATE_OUTPUT_DIR:
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
"Monte [Master] %s:%d reported a failure to create output directories for run %d.\n",
|
||||
slave.machine_name.c_str(), slave.id, slave.current_run->id);
|
||||
}
|
||||
handle_retry(slave, MonteRun::NO_PERM);
|
||||
handle_retry(slave, MonteRun::MC_CANT_CREATE_OUTPUT_DIR);
|
||||
break;
|
||||
|
||||
/**
|
||||
@ -159,7 +159,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
* can occur when the master determines that a slave has timed out, and
|
||||
* then that slave itself reports a timeout. </ul>
|
||||
*/
|
||||
case MonteRun::TIMEDOUT:
|
||||
case MonteRun::MC_RUN_TIMED_OUT:
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(
|
||||
MSG_ERROR,
|
||||
@ -168,7 +168,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
}
|
||||
if (slave.state != MonteSlave::UNRESPONSIVE_RUNNING &&
|
||||
slave.state != MonteSlave::UNRESPONSIVE_STOPPING) {
|
||||
handle_retry(slave, MonteRun::TIMEDOUT);
|
||||
handle_retry(slave, MonteRun::MC_RUN_TIMED_OUT);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -179,7 +179,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
|
||||
"Monte [Master] %s:%d reported unrecognized exit status (%d) for run %d. Skipping.\n",
|
||||
slave.machine_name.c_str(), slave.id, exit_status, slave.current_run->id);
|
||||
}
|
||||
resolve_run(slave, MonteRun::UNKNOWN);
|
||||
resolve_run(slave, MonteRun::MC_UNRECOGNIZED_RETURN_CODE);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,7 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
|
||||
int signal = WTERMSIG(return_value);
|
||||
/** <li> Extract the exit status of the child. */
|
||||
MonteRun::ExitStatus exit_status = signal == SIGALRM ? MonteRun::TIMEDOUT : MonteRun::CORED;
|
||||
MonteRun::ExitStatus exit_status = signal == SIGALRM ? MonteRun::MC_RUN_TIMED_OUT : MonteRun::MC_RUN_DUMPED_CORE;
|
||||
if (verbosity >= ERROR) {
|
||||
message_publish(MSG_ERROR, "Monte [%s:%d] Run killed by signal %d: %s\n",
|
||||
machine_name.c_str(), slave_id, signal, strsignal(signal)) ;
|
||||
@ -99,14 +99,14 @@ int Trick::MonteCarlo::slave_process_run() {
|
||||
} else {
|
||||
input[size] = '\0';
|
||||
if ( ip_parse(input) != 0 ) {
|
||||
exit(MonteRun::BAD_INPUT);
|
||||
exit(MonteRun::MC_PROBLEM_PARSING_INPUT);
|
||||
}
|
||||
|
||||
/** <ul><li> Create the run directory. */
|
||||
std::string output_dir = command_line_args_get_output_dir();
|
||||
if (access(output_dir.c_str(), F_OK) != 0) {
|
||||
if (mkdir(output_dir.c_str(), 0775) == -1) {
|
||||
exit(MonteRun::NO_PERM);
|
||||
exit(MonteRun::MC_CANT_CREATE_OUTPUT_DIR);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user