Fix enums in MonteCarlo ExitStatus to less susceptible to conflict wi… (#628)

* Fix enums in MonteCarlo ExitStatus to less susceptible to conflict with other code. ref #609

* Add MC_ prefix to ExitStatus enums. Ref #609
This commit is contained in:
jmpenn 2018-06-05 15:10:08 -05:00 committed by GitHub
parent 1015f6ec90
commit 882b568b86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 28 additions and 28 deletions

View File

@ -36,13 +36,13 @@ namespace Trick {
public:
/** Details the manner in which this run exited. */
enum ExitStatus {
INCOMPLETE, /**< not completed */
COMPLETE, /**< completed with no errors */
CORED, /**< core dumped */
TIMEDOUT, /**< timed out */
NO_PERM, /**< could not write output files */
BAD_INPUT, /**< problem parseing monte carlo input */
UNKNOWN /**< unrecognized return code */
MC_RUN_INCOMPLETE, /**< not completed */
MC_RUN_COMPLETE, /**< completed with no errors */
MC_RUN_DUMPED_CORE, /**< core dumped */
MC_RUN_TIMED_OUT, /**< timed out */
MC_CANT_CREATE_OUTPUT_DIR, /**< could not write output files */
MC_PROBLEM_PARSING_INPUT, /**< problem parseing monte carlo input */
MC_UNRECOGNIZED_RETURN_CODE /**< unrecognized return code */
};
/** Unique identifier sequentially assigned, starting at zero, by the master. */
@ -73,7 +73,7 @@ namespace Trick {
num_tries(0),
start_time(0),
end_time(0),
exit_status(INCOMPLETE) {}
exit_status(MC_RUN_INCOMPLETE) {}
};

View File

@ -278,7 +278,7 @@ int Trick::MonteCarlo::shutdown() {
if (enabled && is_slave()) {
connection_device.port = master_port;
if (tc_connect(&connection_device) == TC_SUCCESS) {
int exit_status = MonteRun::COMPLETE;
int exit_status = MonteRun::MC_RUN_COMPLETE;
if (verbosity >= ALL) {
message_publish(MSG_INFO, "Monte [%s:%d] Sending run exit status to master: %d\n",
machine_name.c_str(), slave_id, exit_status) ;
@ -318,7 +318,7 @@ void Trick::MonteCarlo::handle_retry(MonteSlave& slave, MonteRun::ExitStatus exi
/** @par Detailed Design: */
void Trick::MonteCarlo::resolve_run(MonteSlave& slave, MonteRun::ExitStatus exit_status) {
if (exit_status != MonteRun::COMPLETE) {
if (exit_status != MonteRun::MC_RUN_COMPLETE) {
failed_runs.push_back(slave.current_run);
}
@ -352,12 +352,12 @@ void Trick::MonteCarlo::check_timeouts() {
* <ul><li> This run might have been redispatched due to a previous timeout for which the slave actually returned
* data later. Only process this timeout if the run hasn't been resolved yet.
*/
if (slaves[i]->current_run->exit_status == MonteRun::INCOMPLETE) {
if (slaves[i]->current_run->exit_status == MonteRun::MC_RUN_INCOMPLETE) {
if (verbosity >= ERROR) {
message_publish(MSG_ERROR, "Monte [Master] %s:%d has not responded for run %d.\n",
slaves[i]->machine_name.c_str(), slaves[i]->id, slaves[i]->current_run->id) ;
}
handle_retry(*slaves[i], MonteRun::TIMEDOUT);
handle_retry(*slaves[i], MonteRun::MC_RUN_TIMED_OUT);
}
/** </ul><li> Update the slave's state. */
slaves[i]->state = slaves[i]->state == MonteSlave::RUNNING ?

View File

@ -92,9 +92,9 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
* this slave was marked as having timed out. If that is the case,
* discard these results.
*/
if (slave.current_run->exit_status != MonteRun::INCOMPLETE) {
if (slave.current_run->exit_status != MonteRun::MC_RUN_INCOMPLETE) {
// TODO: If a slave times out or core dumps in it's monte_slave_post
// jobs, the master will receive a COMPLETE status from the slave's
// jobs, the master will receive a MC_RUN_COMPLETE status from the slave's
// child process and then an error status from the parent, rendering
// this message incorrect.
if (verbosity >= ALL) {
@ -118,39 +118,39 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
switch (exit_status) {
case MonteRun::COMPLETE:
resolve_run(slave, MonteRun::COMPLETE);
case MonteRun::MC_RUN_COMPLETE:
resolve_run(slave, MonteRun::MC_RUN_COMPLETE);
run_queue(&master_post_queue, "in master_post queue") ;
break;
case MonteRun::BAD_INPUT:
case MonteRun::MC_PROBLEM_PARSING_INPUT:
if (verbosity >= ERROR) {
message_publish(
MSG_ERROR,
"Monte [Master] %s:%d reported bad input for run %d. Skipping.\n",
slave.machine_name.c_str(), slave.id, slave.current_run->id) ;
}
resolve_run(slave, MonteRun::BAD_INPUT);
resolve_run(slave, MonteRun::MC_PROBLEM_PARSING_INPUT);
break;
case MonteRun::CORED:
case MonteRun::MC_RUN_DUMPED_CORE:
if (verbosity >= ERROR) {
message_publish(
MSG_ERROR,
"Monte [Master] %s:%d reported core dump for run %d. Skipping.\n",
slave.machine_name.c_str(), slave.id, slave.current_run->id) ;
}
resolve_run(slave, MonteRun::CORED);
resolve_run(slave, MonteRun::MC_RUN_DUMPED_CORE);
break;
case MonteRun::NO_PERM:
case MonteRun::MC_CANT_CREATE_OUTPUT_DIR:
if (verbosity >= ERROR) {
message_publish(
MSG_ERROR,
"Monte [Master] %s:%d reported a failure to create output directories for run %d.\n",
slave.machine_name.c_str(), slave.id, slave.current_run->id);
}
handle_retry(slave, MonteRun::NO_PERM);
handle_retry(slave, MonteRun::MC_CANT_CREATE_OUTPUT_DIR);
break;
/**
@ -159,7 +159,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
* can occur when the master determines that a slave has timed out, and
* then that slave itself reports a timeout. </ul>
*/
case MonteRun::TIMEDOUT:
case MonteRun::MC_RUN_TIMED_OUT:
if (verbosity >= ERROR) {
message_publish(
MSG_ERROR,
@ -168,7 +168,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
}
if (slave.state != MonteSlave::UNRESPONSIVE_RUNNING &&
slave.state != MonteSlave::UNRESPONSIVE_STOPPING) {
handle_retry(slave, MonteRun::TIMEDOUT);
handle_retry(slave, MonteRun::MC_RUN_TIMED_OUT);
}
break;
@ -179,7 +179,7 @@ void Trick::MonteCarlo::handle_run_data(Trick::MonteSlave& slave) {
"Monte [Master] %s:%d reported unrecognized exit status (%d) for run %d. Skipping.\n",
slave.machine_name.c_str(), slave.id, exit_status, slave.current_run->id);
}
resolve_run(slave, MonteRun::UNKNOWN);
resolve_run(slave, MonteRun::MC_UNRECOGNIZED_RETURN_CODE);
break;
}

View File

@ -70,7 +70,7 @@ int Trick::MonteCarlo::slave_process_run() {
int signal = WTERMSIG(return_value);
/** <li> Extract the exit status of the child. */
MonteRun::ExitStatus exit_status = signal == SIGALRM ? MonteRun::TIMEDOUT : MonteRun::CORED;
MonteRun::ExitStatus exit_status = signal == SIGALRM ? MonteRun::MC_RUN_TIMED_OUT : MonteRun::MC_RUN_DUMPED_CORE;
if (verbosity >= ERROR) {
message_publish(MSG_ERROR, "Monte [%s:%d] Run killed by signal %d: %s\n",
machine_name.c_str(), slave_id, signal, strsignal(signal)) ;
@ -99,14 +99,14 @@ int Trick::MonteCarlo::slave_process_run() {
} else {
input[size] = '\0';
if ( ip_parse(input) != 0 ) {
exit(MonteRun::BAD_INPUT);
exit(MonteRun::MC_PROBLEM_PARSING_INPUT);
}
/** <ul><li> Create the run directory. */
std::string output_dir = command_line_args_get_output_dir();
if (access(output_dir.c_str(), F_OK) != 0) {
if (mkdir(output_dir.c_str(), 0775) == -1) {
exit(MonteRun::NO_PERM);
exit(MonteRun::MC_CANT_CREATE_OUTPUT_DIR);
}
}