mirror of
https://github.com/nasa/trick.git
synced 2024-12-18 12:56:26 +00:00
Initial excision of DMTCP from Trick. #28
This commit is contained in:
parent
94f20d426a
commit
e487e5609f
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -14,5 +14,4 @@ tutorial.doc export-ignore
|
||||
users_guide.html export-ignore
|
||||
MonteCarlo_Presentation.pptx export-ignore
|
||||
Trick_07_to_10.pptx export-ignore
|
||||
Trick_Checkpointing_DMTCP.pptx export-ignore
|
||||
trick_source/data_products/DPX/test export-ignore
|
||||
|
@ -141,7 +141,6 @@ set( IO_SRC
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_CheckPointRestart.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_Clock.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_CommandLineArguments.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DMTCP.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRAscii.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRBinary.cpp
|
||||
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRHDF5.cpp
|
||||
|
1
Makefile
1
Makefile
@ -29,7 +29,6 @@ SIM_SERV_DIRS = \
|
||||
${TRICK_HOME}/trick_source/sim_services/CommandLineArguments \
|
||||
${TRICK_HOME}/trick_source/sim_services/DataRecord \
|
||||
${TRICK_HOME}/trick_source/sim_services/DebugPause \
|
||||
${TRICK_HOME}/trick_source/sim_services/DMTCP \
|
||||
${TRICK_HOME}/trick_source/sim_services/EchoJobs \
|
||||
${TRICK_HOME}/trick_source/sim_services/Environment \
|
||||
${TRICK_HOME}/trick_source/sim_services/EventManager \
|
||||
|
@ -49,50 +49,6 @@ AC_DEFUN([AX_HDF5_HOME],[
|
||||
AC_SUBST([HDF5_HOME])
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_DMTCP_HOME],[
|
||||
AC_ARG_WITH([dmtcp],
|
||||
AS_HELP_STRING([--with-dmtcp@<:@=DIR@:>@], [DMTCP root directory]),
|
||||
[DMTCP_HOME="$withval"
|
||||
AS_IF([test "$DMTCP_HOME" = "yes"],
|
||||
AC_CHECK_HEADER(dmtcpaware.h,[DMTCP_HOME="/usr"],AC_MSG_ERROR([could not find dmtcpaware.h])),
|
||||
AC_CHECK_FILE([$DMTCP_HOME/include/dmtcpaware.h],
|
||||
[],
|
||||
AC_MSG_ERROR([could not find $DMTCP_HOME/include/dmtcpaware.h])
|
||||
)
|
||||
)],
|
||||
[DMTCP_HOME=""]
|
||||
)
|
||||
AC_SUBST([DMTCP_HOME])
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_TPRO_HOME],[
|
||||
AC_ARG_WITH([tpro],
|
||||
AS_HELP_STRING([--with-tpro@<:@=DIR@:>@], [TPRO root directory]),
|
||||
[TPRO_HOME="$withval"
|
||||
AC_CHECK_FILE([$TPRO_HOME/linux/h/tpro.h],
|
||||
[],
|
||||
AC_MSG_ERROR([could not find $TPRO_HOME/linux/h/tpro.h])
|
||||
)
|
||||
],
|
||||
[TPRO_HOME=""]
|
||||
)
|
||||
AC_SUBST([TPRO_HOME])
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_BC635_HOME],[
|
||||
AC_ARG_WITH([bc635],
|
||||
AS_HELP_STRING([--with-bc635@<:@=DIR@:>@], [BC635 root directory]),
|
||||
[BC635_HOME="$withval"
|
||||
AC_CHECK_FILE([$BC635_HOME/sample/bcuser.h],
|
||||
[],
|
||||
AC_MSG_ERROR([could not find $BC635_HOME/sample/bcuser.h])
|
||||
)
|
||||
],
|
||||
[BC635_HOME=""]
|
||||
)
|
||||
AC_SUBST([BC635_HOME])
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_GSL_HOME],[
|
||||
AC_ARG_WITH([gsl],
|
||||
AS_HELP_STRING([--with-gsl@<:@=DIR@:>@], [GSL root directory]),
|
||||
@ -571,13 +527,12 @@ AC_SUBST([UDUNITS_EXCLUDE])
|
||||
|
||||
dnl process the other optional command line arguments
|
||||
AX_HDF5_HOME([])
|
||||
AX_DMTCP_HOME([])
|
||||
AX_TPRO_HOME([])
|
||||
AX_BC635_HOME([])
|
||||
AX_GSL_HOME([])
|
||||
AX_GTEST_HOME([])
|
||||
AX_ER7_UTILS([])
|
||||
|
||||
AC_OUTPUT([share/trick/makefiles/config_user.mk])
|
||||
AC_CONFIG_FILES([share/trick/makefiles/config_user.mk])
|
||||
AC_OUTPUT
|
||||
|
||||
|
||||
|
||||
printf "\033@<:@32mconfigure script successfully completed\033@<:@0m\n"
|
||||
|
@ -110,35 +110,6 @@ new_slave.chkpnt_load_auto = 0
|
||||
|
||||
in which case your Slave would have to have its own model code to perform a checkpoint dump/load.
|
||||
|
||||
If the Slave simulation was built with DMTCP enabled (for more information see TBD DMTCP Section), you can set the following variable
|
||||
in the Master input file, causing the Slave to dump/load a binary DMTCP checkpoint when the Master commands:
|
||||
|
||||
```
|
||||
new_slave.chkpnt_binary = 1
|
||||
```
|
||||
|
||||
Note that when loading a binary DMTCP checkpoint, the checkpoint is a new executable file that must be run. When the
|
||||
Master commands a checkpoint (and chkpnt_load_auto=1), the Slave will send the Master its checkpoint file name path
|
||||
and then terminate itself. The Master will then automatically restart the Slave by executing that checkpoint file.
|
||||
If chkpnt_load_auto=0, the user is responsible for terminating and restarting the Slave.
|
||||
|
||||
To run the Slave with DMTCP enabled, you'll have to modify some of the Slave's attributes in the Master input file.
|
||||
|
||||
- S_main_name normally defaults to your Slave's executable, which Trick fills in for you. But you will set it to instead run the dmtcp_checkpoint command.
|
||||
- run_input_file should not change from what your Slave would normally run, but it needs the keyword "dmtcp" specified after it.
|
||||
- sync_error_terminate needs to be 0 because the Slave must be terminated before the Master restarts it, and we don't want Master to also terminate.
|
||||
- reconnect_wait_limit is the time the Master will then wait for the Slave to be restarted and get reconnected with the Master. If the Slave does not
|
||||
reconnect within this time, the Master will deactivate the Slave for the rest of the simulation.
|
||||
|
||||
Here's an example:
|
||||
|
||||
```
|
||||
new_slave.S_main_name = "/users/bob/dmtcp-1.2.7/bin/dmtcp_checkpoint --quiet --new-coordinator --checkpoint-open-files --ckptdir ./dmtcp_checkpoints ./S_main_Linux_4.4_x86_64.exe"
|
||||
new_slave.run_input_file = "RUN_test/slave.py dmtcp"
|
||||
new_slave.sync_error_terminate = 0
|
||||
new_slave.reconnect_wait_limit = 10.0
|
||||
```
|
||||
|
||||
When chkpnt_load_auto=1, the Slave restarting and reconnecting should occur within a second or two. If chkpnt_load_auto=0, the user has
|
||||
to restart the slave himself (and may even be typing in the checkpoint executable on the command line), so reconnect_wait_limit should be
|
||||
set accordingly.
|
||||
|
@ -57,21 +57,12 @@ namespace Trick {
|
||||
/** Times to dump a checkpoint. Saved as simulation tics.\n */
|
||||
std::priority_queue< long long, std::vector< long long >, std::greater< long long > > checkpoint_times ; /**< trick_units(--) */
|
||||
|
||||
/** Times to dump a dmtcp_checkpoint. Saved as simulation tics.\n */
|
||||
std::priority_queue< long long, std::vector< long long >, std::greater< long long > > dmtcp_checkpoint_times ; /**< trick_units(--) */
|
||||
|
||||
/** Period to dump a recurring checkpoint. Saved as simulation tics.\n */
|
||||
long long safestore_period ; /**< trick_units(--) */
|
||||
|
||||
/** Period to dump a recurring dmtcp checkpoint. Saved as simulation tics.\n */
|
||||
long long dmtcp_safestore_period ; /**< trick_units(--) */
|
||||
|
||||
/** Next time to dump a recurring checkpoint. Saved as simulation tics.\n */
|
||||
long long safestore_time ; /**< trick_units(--) */
|
||||
|
||||
/** Next time to dump a recurring dmtcp checkpoint. Saved as simulation tics.\n */
|
||||
long long dmtcp_safestore_time ; /**< trick_units(--) */
|
||||
|
||||
/** If true take a pre_init_checkpoint\n */
|
||||
bool pre_init_checkpoint ; /**< trick_units(--) */
|
||||
|
||||
@ -84,9 +75,6 @@ namespace Trick {
|
||||
/** If true enable taking safestore checkpoints\n */
|
||||
bool safestore_enabled ; /**< trick_units(--) */
|
||||
|
||||
/** If true enable taking safestore checkpoints\n */
|
||||
bool dmtcp_safestore_enabled ; /**< trick_units(--) */
|
||||
|
||||
/** output_directory/checkpoint_file_name to dump for a checkpoint\n */
|
||||
std::string output_file ; /**< ** */
|
||||
|
||||
@ -147,15 +135,6 @@ namespace Trick {
|
||||
int set_safestore_enabled(bool yes_no) ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to set the dmtcp_safestore_enabled flag. If dmtcp_safestore_enabled is set
|
||||
periodic checkpoints will be done according to dmtcp_safestore_period that was set in dmtcp_checkpoint_safestore().
|
||||
The checkpointed file name is @e dmtcp_chkpnt_safestore.
|
||||
@par Python Usage:
|
||||
@code trick.dmtcp_checkpoint_safestore_set_enabled(<yes_no>) @endcode
|
||||
@param yes_no - boolean yes (C integer 1) = dump periodic checkpoint, no (C integer 0) = do not dump
|
||||
@return always 0
|
||||
*/
|
||||
int dmtcp_set_safestore_enabled(bool yes_no) ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to get the name of the checkpoint dump file.
|
||||
@ -200,16 +179,6 @@ namespace Trick {
|
||||
*/
|
||||
int set_safestore_time(double in_time) ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to set the desired period that dmtcp safestore checkpoints will be dumped. (Sets dmtcp_safestore_period to the integral time tic value corresponding
|
||||
to the incoming in_time so that checkpoint occurs periodically.)
|
||||
@par Python Usage:
|
||||
@code trick.dmtcp_checkpoint_safestore(<in_time>) @endcode
|
||||
@param in_time - desired dmtcp safestore checkpoint time period in seconds.
|
||||
@return always 0
|
||||
*/
|
||||
int dmtcp_set_safestore_time(double in_time) ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to dump a checkpoint now to the specified file.
|
||||
Calls the MemoryManager checkpoint method with the string argument file_name
|
||||
@ -235,34 +204,6 @@ namespace Trick {
|
||||
*/
|
||||
virtual int checkpoint(double in_time) ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to dump a checkpoint now to the specified file.
|
||||
Calls the MemoryManager checkpoint method with the string argument file_name
|
||||
and sim objects list string separated by "," to specify which sim objects need
|
||||
checkpointing. If sim objects are not specified, all will be checkpointed.
|
||||
@par Python Usage:
|
||||
@code trick.dmtcp_checkpoint() @endcode
|
||||
@param file_name - optional: name of checkpoint file to dump (default is "dmtcp_chkpnt_<time>")
|
||||
@param print_status - optional: boolean yes (C integer 1) = print the dump checkpoint status message
|
||||
@param obj_list_str - optional: sim objects list string for checkpointing (default is dump all)
|
||||
@return always 0
|
||||
*/
|
||||
virtual int dmtcp_checkpoint(std::string file_name = "") ;
|
||||
|
||||
/**
|
||||
@brief @userdesc Command to dump a checkpoint at in_time. (Sets checkpoint_time to the integral time tic value corresponding
|
||||
to the incoming in_time so that checkpoint occurs once at that time at the end of the execution frame.)
|
||||
The checkpointed file name is @e dmtcp_chkpnt_<in_time>.
|
||||
@par Python Usage:
|
||||
@code trick.checkpoint_bianry(<in_time>) @endcode
|
||||
@param in_time - desired checkpoint time in seconds.
|
||||
@return always 0
|
||||
*/
|
||||
virtual int dmtcp_checkpoint(double in_time) ;
|
||||
|
||||
/* helper function to generate DMTCP restart script file name */
|
||||
void setDMTCPFilename( std::string file_name = "");
|
||||
|
||||
/**
|
||||
* Executes the pre_init_checkpoint
|
||||
* @return always 0
|
||||
@ -289,14 +230,6 @@ namespace Trick {
|
||||
*/
|
||||
virtual int write_checkpoint() ;
|
||||
|
||||
/**
|
||||
* Creates a file name based on the simulation time, "checkpoint_<time>" and
|
||||
* calls checkpoint(string) routine with the filename
|
||||
* @param sim_time_tics - current simulation time
|
||||
* @return always 0
|
||||
*/
|
||||
virtual int write_dmtcp_checkpoint() ;
|
||||
|
||||
/**
|
||||
* Creates a file name based on the simulation time, "checkpoint_<time>" and
|
||||
* calls checkpoint(string) routine with the filename
|
||||
|
@ -46,15 +46,6 @@ const char * checkpoint_get_load_file() ;
|
||||
/* checkpoint call accessible from C code */
|
||||
int checkpoint( const char * file_name );
|
||||
|
||||
/* set dmtcp safestore_enabled flag */
|
||||
int dmtcp_checkpoint_safestore(int yes_no) ;
|
||||
|
||||
/* dmtcp safestore checkpoint call accessible from C code */
|
||||
int dmtcp_checkpoint_safestore_period( double in_period ) ;
|
||||
|
||||
/* dmtcp checkpoint call accessible from C code */
|
||||
int dmtcp_checkpoint( const char * file_name );
|
||||
|
||||
/* checkpoint for specific sim objects call from C code */
|
||||
int checkpoint_objects( const char * file_name, const char * objects ) ;
|
||||
|
||||
|
@ -1,68 +0,0 @@
|
||||
/*
|
||||
PURPOSE:
|
||||
(DMTCP)
|
||||
*/
|
||||
|
||||
#ifndef DMTCP_HH
|
||||
#define DMTCP_HH
|
||||
|
||||
#ifdef _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "trick/dmtcp_checkpoint_c_intf.hh"
|
||||
#include "trick/Scheduler.hh"
|
||||
|
||||
namespace Trick {
|
||||
class DMTCP : public Trick::Scheduler {
|
||||
|
||||
public:
|
||||
|
||||
DMTCP() ;
|
||||
|
||||
int init() ;
|
||||
int freeze() ;
|
||||
int pre() ;
|
||||
int post() ;
|
||||
int restart() ;
|
||||
int call_dmtcp();
|
||||
|
||||
void dmtcpSetCheckpointNow() ;
|
||||
void dmtcpJobQueue( std::string file_name );
|
||||
void renameRestartScript();
|
||||
void dmtcpSystemCmd( const std::string& str );
|
||||
void dmtcpCleanup();
|
||||
void dmtcpSafestoreDir();
|
||||
void dmtcpRenameCmd();
|
||||
|
||||
bool isSpecialCharacter( const std::string& str );
|
||||
|
||||
std::string getScriptName();
|
||||
std::string splitFilename( const std::string& str );
|
||||
|
||||
unsigned int dmtcp_ckpt_complete;
|
||||
std::priority_queue<std::string> dmtcp_checkpoint_jobs_queue ; // ** ignore this for checkpointing
|
||||
|
||||
virtual int write_s_job_execution( FILE * fp ) ;
|
||||
virtual int instrument_job_before(Trick::JobData * instrument_job) ;
|
||||
virtual int instrument_job_after(Trick::JobData * instrument_job) ;
|
||||
virtual int instrument_job_remove(std::string in_job) ;
|
||||
|
||||
private:
|
||||
|
||||
bool checkpoint_now ;
|
||||
char prev_checkpoint_dirname[1024] ;
|
||||
|
||||
/** queue to hold jobs to be called before a checkpoint is dumped. */
|
||||
Trick::ScheduledJobQueue pre_queue ; /* ** */
|
||||
|
||||
/** queue to hold jobs to be called after a checkpoint is dumped. */
|
||||
Trick::ScheduledJobQueue post_queue ; /* ** */
|
||||
|
||||
/** queue to hold jobs to be called after a checkpoint is reloaded. */
|
||||
Trick::ScheduledJobQueue restart_queue ; /* ** */
|
||||
} ;
|
||||
}
|
||||
#endif
|
@ -85,9 +85,6 @@ namespace Trick {
|
||||
/** @brief Calls init() to create thread for writing simulation data to disk during restart. */
|
||||
int restart() ;
|
||||
|
||||
/** @brief Calls restart of all data recording groups. */
|
||||
int dmtcp_restart() ;
|
||||
|
||||
/** @brief Stop data recording during simulation shutdown. */
|
||||
virtual int shutdown() ;
|
||||
|
||||
|
@ -287,12 +287,6 @@ namespace Trick {
|
||||
*/
|
||||
virtual int restart() ;
|
||||
|
||||
/**
|
||||
@brief Restart data recording, add data_record job to the executive scheduler.
|
||||
@returns always 0
|
||||
*/
|
||||
virtual int dmtcp_restart() ;
|
||||
|
||||
/**
|
||||
@brief Writes format specific information to the top line of the log header file.
|
||||
@returns always 0
|
||||
|
@ -66,7 +66,7 @@ namespace Trick {
|
||||
MSQ_DECLARE (master_command, MS_SIM_COMMAND)
|
||||
MSQ_DECLARE (slave_command, MS_SIM_COMMAND)
|
||||
// checkpoint data is not sent every frame, so dont need a queue
|
||||
int slave_port; /**< trick_units(--) slave's dmtcp checkpoint port */
|
||||
int slave_port; /**< trick_units(--) slave's checkpoint port */
|
||||
char chkpnt_name[256]; /**< trick_units(--) checkpoint dir/filename */
|
||||
} MSSharedMemData;
|
||||
|
||||
@ -140,7 +140,7 @@ namespace Trick {
|
||||
virtual MS_SIM_COMMAND read_command() ;
|
||||
|
||||
/**
|
||||
@brief Read a port number (i.e. dmtcp port) from the other simulation.
|
||||
@brief Read a port number from the other simulation.
|
||||
@return the port read or MS_ERROR_PORT if the read failed
|
||||
*/
|
||||
virtual int read_port() ;
|
||||
@ -164,7 +164,7 @@ namespace Trick {
|
||||
virtual int write_command(MS_SIM_COMMAND command) ;
|
||||
|
||||
/**
|
||||
@brief Writes a port number (i.e. dmtcp port) to the other simulation.
|
||||
@brief Writes a port number to the other simulation.
|
||||
@return the number of bytes written
|
||||
*/
|
||||
virtual int write_port(int port) ;
|
||||
|
@ -98,7 +98,7 @@ namespace Trick {
|
||||
virtual MS_SIM_COMMAND read_command() ;
|
||||
|
||||
/**
|
||||
@brief Read a port number (i.e. dmtcp port) from the other simulation. Calls tc_read.
|
||||
@brief Read a port number from the other simulation. Calls tc_read.
|
||||
@return the port read or MS_ERROR_PORT if the read failed
|
||||
*/
|
||||
virtual int read_port() ;
|
||||
@ -122,7 +122,7 @@ namespace Trick {
|
||||
virtual int write_command(MS_SIM_COMMAND command) ;
|
||||
|
||||
/**
|
||||
@brief Writes a port number (i.e. dmtcp port) to the other simulation. Calls tc_write.
|
||||
@brief Writes a port number to the other simulation. Calls tc_write.
|
||||
@return the number of bytes written
|
||||
*/
|
||||
virtual int write_port(int port) ;
|
||||
|
@ -60,12 +60,9 @@ namespace Trick {
|
||||
in which case the master will no longer communicate with the slave.\n */
|
||||
bool activated ; /**< trick_units(--) */
|
||||
|
||||
/** Indicates "dmtcp" or "ascii" slave. Used to contruct sync_port_tag (default is "undefined").\n*/
|
||||
/** Indicates "ascii" slave. Used to contruct sync_port_tag (default is "undefined").\n*/
|
||||
std::string slave_type; /**< trick_units(--) */
|
||||
|
||||
/** Slave's dmtcp port if slave_type "dmtcp" (=0 if slave_type "ascii").\n*/
|
||||
long long dmtcp_port; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc Which remote shell shall the master use to start the slave.\n
|
||||
TRICK_SSH means use ssh (the default), TRICK_RSH means use rsh, TRICK_USER_REMOTE_SH means use custom.\n */
|
||||
Trick::RemoteShell remote_shell; /**< trick_units(--) */
|
||||
@ -92,16 +89,10 @@ namespace Trick {
|
||||
/** @userdesc When master dumps a checkpoint, command the slave to dump a checkpoint (default=true).\n */
|
||||
bool chkpnt_dump_auto; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc When master loads a checkpoint, command the slave to load a checkpoint (default=true).\n
|
||||
If chkpnt_binary is true, the slave will terminate and the master will load the slave's dmtcp checkpoint.\n */
|
||||
/** @userdesc When master loads a checkpoint, command the slave to load a checkpoint (default=true).\n */
|
||||
bool chkpnt_load_auto; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc When master dumps/loads a checkpoint, this indicates the format of the slave checkpoint.\n
|
||||
The default = false which means the typical trick ascii checkpoint. True means a dmtcp checkpoint.\n */
|
||||
bool chkpnt_binary; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc Send master's checkpoint file name to slave from here.\n
|
||||
Also used to read dmtcp checkpoint file name from slave when loading dmtcp checkpoint.\n **/
|
||||
/** @userdesc Send master's checkpoint file name to slave from here.\n **/
|
||||
char chkpnt_name[256]; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc The "RUN_<dir>/<input_file>" of the slave to use as the parameter to S_main_name.\n */
|
||||
@ -166,12 +157,6 @@ namespace Trick {
|
||||
*/
|
||||
int write_master_chkpnt_name(std::string full_path_name) ;
|
||||
|
||||
/**
|
||||
@brief Restart the slave's DMTCP executable after 1) killing its dmtcp_coordinator, and
|
||||
2) disconnecting and starting a new master/slave socket connection.
|
||||
@return always 0
|
||||
*/
|
||||
int restart_dmtcp_slave();
|
||||
} ;
|
||||
|
||||
/**
|
||||
|
@ -36,7 +36,7 @@ namespace Trick {
|
||||
/* True when this slave is enabled (default is false), see process_sim_args.\n */
|
||||
bool enabled ; /**< trick_units(--) */
|
||||
|
||||
/* True when this slave has reconnected (dmtcp_restart) */
|
||||
/* True when this slave has reconnected */
|
||||
bool reconnected ; /**< trick_units(--) */
|
||||
|
||||
/* True when this slave has sent ReconnectCmd to master (default is false).\n */
|
||||
@ -45,8 +45,7 @@ namespace Trick {
|
||||
/* True when this slave has published a message that it has reconnected to master(default is false).\n */
|
||||
bool msg_published; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc Read in master's checkpoint file name here, used for forming slave's checkpoint file name.\n
|
||||
Also used to send slave's dmtcp checkpoint file name to master when loading dmtcp checkpoint.\n **/
|
||||
/** @userdesc Read in master's checkpoint file name here, used for forming slave's checkpoint file name.\n **/
|
||||
char chkpnt_name[256]; /**< trick_units(--) */
|
||||
|
||||
/** @userdesc True means terminate the slave if it loses synchronization with the master.\n
|
||||
@ -85,12 +84,6 @@ namespace Trick {
|
||||
*/
|
||||
int init() ;
|
||||
|
||||
/**
|
||||
@brief Reconnect master/slave communications for dmtcp restart.
|
||||
@return always 0
|
||||
*/
|
||||
int dmtcp_restart() ;
|
||||
|
||||
/**
|
||||
@brief Read the master's checkpoint name.
|
||||
@return the appropriate name to use for the current dump/load command
|
||||
|
@ -1 +0,0 @@
|
||||
#include "trick/DMTCP.hh"
|
@ -1 +0,0 @@
|
||||
#include "trick/dmtcp_checkpoint_c_intf.hh"
|
@ -1,38 +0,0 @@
|
||||
/*
|
||||
PURPOSE:
|
||||
(DMTCP C interface)
|
||||
*/
|
||||
|
||||
#ifndef DMTCP_CHECKPOINT_C_INTF_HH
|
||||
#define DMTCP_CHECKPOINT_C_INTF_HH
|
||||
|
||||
#ifdef _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* call DMTCP function dmtcpIsEnabled() */
|
||||
int dmtcp_is_enabled() ;
|
||||
|
||||
/* call DMTCP funcion dmtcpRunCommand() */
|
||||
int call_dmtcp();
|
||||
|
||||
/* DMTCP jobs to run pre, post, and restart. Arguments for dmtcpInstallHooks() - defined in dmtcp/src/dmtcpaware.c */
|
||||
void dmtcp_pre_checkpoint() ;
|
||||
void dmtcp_post_checkpoint() ;
|
||||
void dmtcp_restart() ;
|
||||
|
||||
/* set DMTCP checkpoint flag */
|
||||
void dmtcp_set_checkpoint_now() ;
|
||||
|
||||
/* queue to store dmtcp job names */
|
||||
void dmtcp_job_queue( std::string file_name );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -4,7 +4,6 @@
|
||||
#include "trick/reference_frame.h"
|
||||
#include "trick/GetTimeOfDayClock.hh"
|
||||
#include "trick/CommandLineArguments.hh"
|
||||
#include "trick/DMTCP.hh"
|
||||
#include "trick/Executive.hh"
|
||||
#include "trick/ExecutiveException.hh"
|
||||
#include "trick/Environment.hh"
|
||||
|
@ -28,8 +28,6 @@ typedef enum {
|
||||
MS_ReconnectCmd = 12, /* Master/Slave: slave reconnected after chkpnt load */
|
||||
MS_ChkpntDumpAsciiCmd = 13, /* Master/Slave: master tells slave to dump an ascii chkpnt */
|
||||
MS_ChkpntLoadAsciiCmd = 14, /* Master/Slave: master tells slave to load an ascii chkpnt */
|
||||
MS_ChkpntDumpBinCmd = 15, /* Master/Slave: master tells slave to dump a binary chkpnt */
|
||||
MS_ChkpntLoadBinCmd = 16 /* Master/Slave: master tells slave to load a binary chkpnt */
|
||||
|
||||
} MS_SIM_COMMAND;
|
||||
|
||||
|
@ -122,8 +122,6 @@ typedef struct {
|
||||
int socket_type ; /* ** Socket type SOCK_STREAM or SOCK_DGRAM */
|
||||
TCCommFlag disabled; /* -- Connection enabled flag */
|
||||
TCCommFlag disable_handshaking; /* -- Set this to skip handshaking in tc_connect and tc_accept. */
|
||||
int dmtcp_use_real ; /* -- if true, use dlsym to lookup connect/accept calls. Used with DMTCP sockets
|
||||
if false call connect/accept directly. */
|
||||
double blockio_limit; /* s Block I/O timeout limit */
|
||||
TCCommBlocking blockio_type; /* -- E.g. spinloops */
|
||||
unsigned char byte_info[TC_BYTE_INFO_LENGTH]; /* ** byte swap info */
|
||||
|
Binary file not shown.
@ -30,10 +30,6 @@ PLATFORM_LIBS = -lrt
|
||||
LD_WHOLE_ARCHIVE := -Wl,-whole-archive
|
||||
LD_NO_WHOLE_ARCHIVE := -Wl,-no-whole-archive
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
PLATFORM_LIBS += -L$(DMTCP)/dmtcpaware -ldmtcpaware
|
||||
endif
|
||||
|
||||
ifneq ($(TPROCTE),)
|
||||
TRICK_ADDITIONAL_CXXFLAGS += -D_TPRO_CTE -I$(TPROCTE)/linux/h -I$(TPROCTE)/linux/tsync/h -I$(TPROCTE)/common/h -I$(TPROCTE)/common/tsync/h
|
||||
PLATFORM_LIBS += $(TPROCTE)/linux/tsync/lib/libtsync.a $(TPROCTE)/linux/lib/libtpro.a
|
||||
|
@ -38,9 +38,6 @@ UDUNITS_LDFLAGS = @UDUNITS_LDFLAGS@
|
||||
TRICK_EXCLUDE += :@UDUNITS_EXCLUDE@
|
||||
|
||||
HDF5 = @HDF5_HOME@
|
||||
DMTCP = @DMTCP_HOME@
|
||||
TPROCTE = @TPRO_HOME@
|
||||
BC635 = @BC635_HOME@
|
||||
GSL_HOME = @GSL_HOME@
|
||||
GTEST_HOME = @GTEST_HOME@
|
||||
USE_ER7_UTILS = @USE_ER7_UTILS@
|
||||
|
@ -56,9 +56,6 @@ GTEST_HOME = @GTEST_HOME@
|
||||
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
|
||||
X_LIB_DIR = @X_LIB_DIR@
|
||||
MOTIF_HOME = @MOTIF_HOME@
|
||||
DMTCP = @DMTCP_HOME@
|
||||
TPROCTE = @TPRO_HOME@
|
||||
BC635 = @BC635_HOME@
|
||||
USE_ER7_UTILS = @USE_ER7_UTILS@
|
||||
|
||||
|
||||
|
@ -19,7 +19,6 @@ a replacement SimObject will create an uncompilable sim.
|
||||
#define TRICK_NO_MASTERSLAVE
|
||||
#define TRICK_NO_INSTRUMENTATION
|
||||
#define TRICK_NO_INTEGRATE
|
||||
#define TRICK_NO_DMTCP
|
||||
#define TRICK_NO_REALTIMEINJECTOR
|
||||
#define TRICK_NO_ZEROCONF
|
||||
#define TRICK_NO_UNITTEST
|
||||
@ -33,7 +32,6 @@ a replacement SimObject will create an uncompilable sim.
|
||||
##include <cstdarg>
|
||||
|
||||
##include "trick/SimObject.hh"
|
||||
##include "trick/DMTCP.hh"
|
||||
##include "trick/exec_proto.h"
|
||||
##include "trick/exec_proto.hh"
|
||||
##include "trick/Executive.hh"
|
||||
@ -146,11 +144,6 @@ class SysSimObject : public Trick::SimObject {
|
||||
//{TRK} P0 ("restart") sched.restart() ;
|
||||
{TRK} P65534 ("restart") exec_collect_init() ;
|
||||
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} ("dmtcp_restart") sched.write_s_job_execution(NULL) ;
|
||||
{TRK} ("dmtcp_restart") sched.write_s_run_summary(NULL) ;
|
||||
#endif
|
||||
|
||||
{TRK} ("system_moding") sched.sched_freeze_to_exec_command(false) ;
|
||||
{TRK} ("end_of_frame") sched.sched_freeze_to_exec_command(true) ;
|
||||
{TRK} ("end_of_frame") sched.async_freeze_to_exec_command() ;
|
||||
@ -217,7 +210,6 @@ class CheckPointRestartSimObject : public Trick::SimObject {
|
||||
{TRK} P1 ("initialization") cpr.write_pre_init_checkpoint() ;
|
||||
{TRK} P65535 ("initialization") cpr.write_post_init_checkpoint() ;
|
||||
{TRK} P0 ("system_checkpoint") cpr.write_checkpoint() ;
|
||||
{TRK} P0 ("top_of_frame") cpr.write_dmtcp_checkpoint() ;
|
||||
{TRK} P0 ("system_checkpoint") cpr.safestore_checkpoint() ;
|
||||
|
||||
{TRK} P0 ("shutdown") cpr.write_end_checkpoint() ;
|
||||
@ -285,9 +277,6 @@ class MessageSimObject : public Trick::SimObject {
|
||||
{TRK} ("exec_time_tic_changed") mpublisher.init() ;
|
||||
|
||||
{TRK} P1 ("restart") mdevice.restart() ;
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} P1 ("dmtcp_restart") mdevice.restart() ;
|
||||
#endif
|
||||
{TRK} ("shutdown") mtcout.shutdown() ;
|
||||
{TRK} ("shutdown") mdevice.shutdown() ;
|
||||
|
||||
@ -463,10 +452,6 @@ class VariableServerSimObject : public Trick::SimObject {
|
||||
#endif
|
||||
{TRK} P1 ("initialization") Trick::launch_all_external_applications();
|
||||
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} P1 ("dmtcp_restart") vs.restart() ;
|
||||
{TRK} P1 ("dmtcp_restart") Trick::launch_all_external_applications();
|
||||
#endif
|
||||
{TRK} ("preload_checkpoint") vs.suspendPreCheckpointReload();
|
||||
{TRK} ("restart") vs.restart();
|
||||
{TRK} ("restart") vs.resumePostCheckpointReload();
|
||||
@ -535,9 +520,6 @@ class DataRecordDispatcherSimObject : public Trick::SimObject {
|
||||
{TRK} ("end_of_frame") drd.signal_thread() ;
|
||||
{TRK} ("preload_checkpoint") drd.preload_checkpoint() ;
|
||||
{TRK} ("restart") drd.restart() ;
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} ("dmtcp_restart") drd.dmtcp_restart() ;
|
||||
#endif
|
||||
{TRK} ("shutdown") drd.shutdown() ;
|
||||
}
|
||||
|
||||
@ -567,9 +549,6 @@ class RTSyncSimObject : public Trick::SimObject {
|
||||
{TRK} P65535 ("initialization") rt_sync.get_sim_end_init_time() ;
|
||||
|
||||
{TRK} P65535 ("restart") rt_sync.restart(exec_get_time_tics()) ;
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} P65535 ("dmtcp_restart") rt_sync.restart(exec_get_time_tics()) ;
|
||||
#endif
|
||||
|
||||
{TRK} ("freeze_init") rt_sync.freeze_init(exec_get_freeze_frame()) ;
|
||||
{TRK} P65535 ("freeze") rt_sync.freeze_pause(exec_get_freeze_frame()) ;
|
||||
@ -638,10 +617,6 @@ class MasterSlaveSimObject : public Trick::SimObject {
|
||||
{TRK} P0 ("initialization") slave.init() ;
|
||||
{TRK} ("checkpoint") master.checkpoint() ;
|
||||
{TRK} ("preload_checkpoint") master.preload_checkpoint() ;
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} ("dmtcp_pre") master.checkpoint() ;
|
||||
{TRK} ("dmtcp_restart") slave.dmtcp_restart() ;
|
||||
#endif
|
||||
|
||||
{TRK} P65534 ("end_of_frame") master.end_of_frame_status_from_slave() ; // must occur BEFORE rt_monitor
|
||||
{TRK} P65535 ("end_of_frame") master.end_of_frame_status_to_slave() ; // must occur AFTER rt_monitor
|
||||
@ -727,21 +702,6 @@ class IntegLoopSimObject : public Trick::SimObject {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
class DMTCPSimObject : public Trick::SimObject {
|
||||
public:
|
||||
Trick::DMTCP dmtcp ;
|
||||
DMTCPSimObject() {
|
||||
exec_register_scheduler(&dmtcp) ;
|
||||
|
||||
{TRK} ("initialization") dmtcp.init() ;
|
||||
{TRK} ("freeze") dmtcp.freeze() ;
|
||||
}
|
||||
}
|
||||
|
||||
DMTCPSimObject trick_dmtcp ;
|
||||
#endif
|
||||
|
||||
#ifndef TRICK_NO_REALTIMEINJECTOR
|
||||
class InjectorExecSimObject : public Trick::SimObject {
|
||||
public:
|
||||
|
@ -34,7 +34,6 @@ if hasattr(top.cvar, 'trick_message'):
|
||||
# from checkpoint restart
|
||||
if hasattr(top.cvar, 'trick_cpr'):
|
||||
checkpoint = top.cvar.trick_cpr.cpr.checkpoint
|
||||
dmtcp_checkpoint = top.cvar.trick_cpr.cpr.dmtcp_checkpoint
|
||||
load_checkpoint = top.cvar.trick_cpr.cpr.load_checkpoint
|
||||
|
||||
# from the input processor
|
||||
|
@ -45,11 +45,6 @@ class SysSimObject : public Trick::SimObject {
|
||||
//{TRK} P0 ("restart") sched.restart() ;
|
||||
{TRK} P65534 ("restart") exec_collect_init() ;
|
||||
|
||||
#ifndef TRICK_NO_DMTCP
|
||||
{TRK} ("dmtcp_restart") sched.write_s_job_execution(NULL) ;
|
||||
{TRK} ("dmtcp_restart") sched.write_s_run_summary(NULL) ;
|
||||
#endif
|
||||
|
||||
{TRK} ("system_moding") sched.sched_freeze_to_exec_command(false) ;
|
||||
{TRK} ("end_of_frame") sched.sched_freeze_to_exec_command(true) ;
|
||||
{TRK} ("end_of_frame") sched.async_freeze_to_exec_command() ;
|
||||
|
@ -1,29 +0,0 @@
|
||||
import trick
|
||||
from trick.sim_objects import *
|
||||
|
||||
def main():
|
||||
|
||||
trick.sim_control_panel_set_enabled(True)
|
||||
trick.exec_set_freeze_command(True)
|
||||
|
||||
trick.real_time_enable()
|
||||
trick.exec_set_software_frame(0.0125)
|
||||
|
||||
trick.frame_log_on()
|
||||
|
||||
trick.dmtcp_checkpoint("test_checkpoint")
|
||||
|
||||
trick.dmtcp_checkpoint(0)
|
||||
trick.dmtcp_checkpoint(3)
|
||||
trick.dmtcp_checkpoint(9)
|
||||
|
||||
trick.add_read(25, "trick.dmtcp_checkpoint()")
|
||||
trick.dmtcp_checkpoint_safestore_set_enabled(True)
|
||||
trick.dmtcp_checkpoint_safestore(15)
|
||||
|
||||
|
||||
my_integ_loop.getIntegrator(trick.Runge_Kutta_2, 4);
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -13,9 +13,6 @@ new_connection = trick.MSSocket()
|
||||
new_slave = trick.SlaveInfo()
|
||||
new_slave.set_connection_type(new_connection)
|
||||
new_slave.sim_path = trick.get_trick_env("TRICK_HOME") + "/trick_sims/SIM_Ball++_L1"
|
||||
#if trick.dmtcp_is_enabled() == 1:
|
||||
# new_slave.S_main_name = "dmtcp_checkpoint --checkpoint-open-files ./S_main_${TRICK_HOST_CPU}.exe"
|
||||
#print "hey " , trick.dmtcp_is_enabled()
|
||||
new_slave.run_input_file = "RUN_slave/slave.py"
|
||||
new_slave.sync_error_terminate = 1
|
||||
trick_master_slave.master.add_slave(new_slave)
|
||||
|
@ -171,7 +171,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
private static String host;
|
||||
private static int port = -1;
|
||||
private static boolean isRestartOptionOn;
|
||||
private static boolean isDmtcpOptionOn;
|
||||
|
||||
// The object of SimState that has Sim state data.
|
||||
private SimState simState;
|
||||
@ -278,20 +277,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
actionController.handleFreeze(debug_flag);
|
||||
}
|
||||
|
||||
@Action
|
||||
public void dumpDMTCPChkpnt() {
|
||||
|
||||
if ( isDmtcpOptionOn ) {
|
||||
String chkpt_dir = simState.getRunPath();
|
||||
String fileName = "dmtcp_chkpnt_" + simState.getTwoFractionFormatted(simState.getExecOutTime());
|
||||
|
||||
actionController.handleDumpDMTCPChkpnt(chkpt_dir, fileName, getMainFrame());
|
||||
|
||||
runtimeStatePanel.setTitle("Dumping DMTCP Checkpoint");
|
||||
currentSimStatusDesc = "PreCheckpoint";
|
||||
}
|
||||
}
|
||||
|
||||
@Action
|
||||
public void shutdownSim() {
|
||||
actionController.handleShutdown();
|
||||
@ -692,15 +677,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
commandLine = matcher.replaceAll("");
|
||||
}
|
||||
|
||||
// check to see if -dmtcp is used
|
||||
Pattern dmtcpOptionPattern = Pattern.compile("(\\-dmtcp)(,|$)");
|
||||
Matcher dmtcp_matcher = dmtcpOptionPattern.matcher(commandLine);
|
||||
|
||||
if (dmtcp_matcher.find()) {
|
||||
isDmtcpOptionOn = true;
|
||||
commandLine = dmtcp_matcher.replaceAll("");
|
||||
}
|
||||
|
||||
// check to see if -auto_exit is used
|
||||
Pattern autoExitOptionPattern = Pattern.compile("(\\-auto\\_exit)(,|$)");
|
||||
Matcher autoExitMatcher = autoExitOptionPattern.matcher(commandLine);
|
||||
@ -995,13 +971,8 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
titledCommandsPanel.setContentContainer(commandsPanel);
|
||||
|
||||
GridLayout gridLayout = null;
|
||||
if (!isDmtcpOptionOn) {
|
||||
// 2 columns and 5 rows, each component has the same width and height.
|
||||
gridLayout = new GridLayout(5,2,2,4);
|
||||
} else {
|
||||
// 2 columns and 6 rows
|
||||
gridLayout = new GridLayout(6,2,2,4);
|
||||
}
|
||||
// 2 columns and 5 rows, each component has the same width and height.
|
||||
gridLayout = new GridLayout(5,2,2,4);
|
||||
|
||||
commandsPanel.setLayout(gridLayout);
|
||||
|
||||
@ -1052,12 +1023,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
|
||||
commandsPanel.add(liteButton);
|
||||
|
||||
if (isDmtcpOptionOn) {
|
||||
commandsPanel.add(new JButton(getAction("dumpDMTCPChkpnt")));
|
||||
dumpChkpntASCIIButton.setText("Dump ASCII Chkpnt");
|
||||
loadChkpntButton.setText("Load ASCII Chkpnt");
|
||||
}
|
||||
|
||||
commandsPanel.add(new JButton(getAction("quit")));
|
||||
|
||||
return titledCommandsPanel;
|
||||
@ -1231,18 +1196,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
return statusBar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the process id of DMTCP
|
||||
*/
|
||||
public void isDmtcpRunning() {
|
||||
|
||||
if (isDmtcpOptionOn) {
|
||||
setActionsEnabled("dumpDMTCPChkpnt",true);
|
||||
} else {
|
||||
setActionsEnabled("dumpDMTCPChkpnt",false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the GUI as needed if SIM states are changed.
|
||||
*/
|
||||
@ -1293,7 +1246,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
} else {
|
||||
enableAllCommands();
|
||||
setActionsEnabled("freezeSim,quit", false);
|
||||
isDmtcpRunning(); /* if DMTCP is not running, disable button on control panel */
|
||||
}
|
||||
logoImagePanel.pause();
|
||||
break;
|
||||
@ -1303,7 +1255,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
disableAllCommands();
|
||||
setActionsEnabled("freezeSim,lite", true);
|
||||
if (debug_flag != 0) {
|
||||
setActionsEnabled("stepSim,dumpDMTCPChkpnt", true);
|
||||
setActionsEnabled("stepSim,dumpChkpntASCII", true);
|
||||
}
|
||||
logoImagePanel.resume();
|
||||
@ -1349,7 +1300,7 @@ public class SimControlApplication extends TrickApplication implements PropertyC
|
||||
ArrayList<String> actions = new ArrayList<String>();
|
||||
|
||||
actions.add("stepSim,recordingSim,startSim,realtime,freezeSim," +
|
||||
"dumpDMTCPChkpnt,dumpChkpntASCII,shutdownSim,loadChkpnt,lite,quit");
|
||||
"dumpChkpntASCII,shutdownSim,loadChkpnt,lite,quit");
|
||||
return actions.toArray(new String[0]);
|
||||
}
|
||||
|
||||
|
@ -349,32 +349,6 @@ public class SimControlActionController {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoked when the user presses Dump DMTCP Chkpnt button.
|
||||
* @param dir directory to dump checkpoint
|
||||
* @param fileName name of checkpoint file
|
||||
* @param dialogParent dialogParent
|
||||
*/
|
||||
public void handleDumpDMTCPChkpnt(String dir, String fileName, Component dialogParent) {
|
||||
|
||||
File selectedFile = UIUtils.chooseSaveFile(dir, fileName, null, dialogParent);
|
||||
|
||||
if (selectedFile != null) {
|
||||
|
||||
String parentPath = selectedFile.getParent();
|
||||
|
||||
if ( !dir.equals(parentPath) ) {
|
||||
|
||||
System.out.println("Path was not updated! DMTCP checkpoints are saved in the --chkpt directory specified in s_main_dmtcp." );
|
||||
}
|
||||
try {
|
||||
|
||||
simcom.put("trick.dmtcp_checkpoint(\"" + selectedFile.getName() + "\")\n");
|
||||
|
||||
} catch (IOException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoked when the user presses Shutdown button.
|
||||
*/
|
||||
|
@ -56,8 +56,6 @@ realtime.Action.shortDescription = Realtime on/off
|
||||
freezeSim.Action.text = Freeze
|
||||
freezeSim.Action.shortDescription = Freeze/Stop the simulation
|
||||
|
||||
dumpDMTCPChkpnt.Action.text = Dump DMTCP Chkpnt
|
||||
dumpDMTCPChkpnt.Action.shortDescription = Dump DMTCP Checkpoint
|
||||
|
||||
shutdownSim.Action.text = Shutdown
|
||||
shutdownSim.Action.shortDescription = Shutdown Simulation
|
||||
|
@ -17,8 +17,6 @@ set( SS_SRC
|
||||
Collect/collect
|
||||
CommandLineArguments/CommandLineArguments
|
||||
CommandLineArguments/command_line_c_intf
|
||||
DMTCP/DMTCP
|
||||
DMTCP/dmtcp_checkpoint_c_intf
|
||||
DataRecord/DRAscii
|
||||
DataRecord/DRBinary
|
||||
DataRecord/DRHDF5
|
||||
|
@ -9,11 +9,6 @@
|
||||
#include <sys/stat.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include "trick/DMTCP.hh"
|
||||
#include "trick/CheckPointRestart.hh"
|
||||
#include "trick/MemoryManager.hh"
|
||||
#include "trick/SimObject.hh"
|
||||
@ -77,11 +72,6 @@ int Trick::CheckPointRestart::set_safestore_enabled(bool yes_no) {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::dmtcp_set_safestore_enabled(bool yes_no) {
|
||||
dmtcp_safestore_enabled = yes_no ;
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::set_cpu_num(int in_cpu_num) {
|
||||
if ( in_cpu_num <= 0 ) {
|
||||
cpu_num = -1 ;
|
||||
@ -159,25 +149,6 @@ int Trick::CheckPointRestart::set_safestore_time(double in_time) {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::dmtcp_set_safestore_time(double in_time) {
|
||||
|
||||
long long software_frame_tics ;
|
||||
|
||||
if ( in_time < 10 ) {
|
||||
std::cout << "\nA DMTCP Safestore Interval less than 10 seconds is not recommended.\n\n";
|
||||
}
|
||||
|
||||
dmtcp_safestore_period = (long long)(in_time * exec_get_time_tic_value()) ;
|
||||
software_frame_tics = exec_get_software_frame_tics() ;
|
||||
|
||||
if ( dmtcp_safestore_period % software_frame_tics ) {
|
||||
dmtcp_safestore_period = ((dmtcp_safestore_time / software_frame_tics) + 1 ) * software_frame_tics ;
|
||||
}
|
||||
dmtcp_safestore_time = dmtcp_safestore_period ;
|
||||
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::checkpoint(std::string file_name, bool print_status, std::string obj_list_str ) {
|
||||
|
||||
// first, empty the sim obj list to make sure there is nothing left from last time
|
||||
@ -286,82 +257,6 @@ int Trick::CheckPointRestart::write_checkpoint() {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
void Trick::CheckPointRestart::setDMTCPFilename( std::string file_name __attribute__((unused))) {
|
||||
#ifdef _DMTCP
|
||||
if (!file_name.compare("") ) {
|
||||
|
||||
std::stringstream file_name_stream ;
|
||||
SIM_MODE sim_mode ;
|
||||
sim_mode = exec_get_mode() ;
|
||||
|
||||
if (sim_mode == Initialization)
|
||||
file_name_stream << "dmtcp_chkpnt_init";
|
||||
else
|
||||
file_name_stream << "dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
|
||||
|
||||
file_name = file_name_stream.str() ;
|
||||
}
|
||||
|
||||
dmtcp_job_queue( file_name );
|
||||
#endif
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::dmtcp_checkpoint( std::string file_name __attribute__((unused))) {
|
||||
#ifdef _DMTCP
|
||||
SIM_MODE sim_mode ;
|
||||
sim_mode = exec_get_mode() ;
|
||||
|
||||
setDMTCPFilename( file_name );
|
||||
dmtcp_set_checkpoint_now();
|
||||
|
||||
//fix for what I believe is a dmtcp bug: --DANNY
|
||||
// When you do a dmtcp_checkpoint from sim control panel (in freeze), the var server calls parse()
|
||||
// in the input processor which calls this function -- IPParse locks ip_mutex before
|
||||
// the parse call and unlocks it after the parse call. BUT if you call_dmtcp right now
|
||||
// the checkpoint can occur while ip_mutex is still locked. When you do a dmtcp restart,
|
||||
// the sim hangs because it cannot unlock ip_mutex (dmtcp claims to handle mutexes, doh!).
|
||||
// SO, don't do call_dmtcp here in freeze, it will be called in the dmtcp freeze job. This "fixes"
|
||||
// the problem, although the var server is a thread so we're technically still vulnerable.
|
||||
if ((sim_mode != Initialization) && (sim_mode != Freeze))
|
||||
call_dmtcp();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::dmtcp_checkpoint( double in_time __attribute__((unused))) {
|
||||
#ifdef _DMTCP
|
||||
long long curr_time = exec_get_time_tics() ;
|
||||
long long new_time = (long long)(in_time * exec_get_time_tic_value()) ;
|
||||
|
||||
if ( new_time >= curr_time )
|
||||
dmtcp_checkpoint_times.push( new_time ) ;
|
||||
#endif
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::write_dmtcp_checkpoint() {
|
||||
#ifdef _DMTCP
|
||||
|
||||
long long curr_time = exec_get_time_tics() ;
|
||||
|
||||
if ( (!dmtcp_checkpoint_times.empty() ) and (curr_time == dmtcp_checkpoint_times.top()) ) {
|
||||
|
||||
dmtcp_checkpoint();
|
||||
|
||||
while ( !dmtcp_checkpoint_times.empty() and (dmtcp_checkpoint_times.top() == curr_time) )
|
||||
dmtcp_checkpoint_times.pop() ;
|
||||
}
|
||||
|
||||
if ( (dmtcp_safestore_enabled) && (curr_time == dmtcp_safestore_time) ){
|
||||
|
||||
dmtcp_checkpoint("dmtcp_chkpnt_safestore");
|
||||
dmtcp_safestore_time += dmtcp_safestore_period ;
|
||||
}
|
||||
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Trick::CheckPointRestart::write_pre_init_checkpoint() {
|
||||
if ( pre_init_checkpoint ) {
|
||||
checkpoint(std::string("chkpnt_pre_init")) ;
|
||||
|
@ -70,15 +70,6 @@ extern "C" int checkpoint_safestore( int yes_no ) {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
* @copydoc Trick::CheckPointRestart::dmtcp_set_safestore_enabled
|
||||
*/
|
||||
extern "C" int dmtcp_checkpoint_safestore( int yes_no ) {
|
||||
the_cpr->dmtcp_set_safestore_enabled(bool(yes_no)) ;
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
* @copydoc Trick::CheckPointRestart::set_safestore_time
|
||||
@ -88,15 +79,6 @@ extern "C" int checkpoint_safestore_period( double in_time ) {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
* @copydoc Trick::CheckPointRestart::dmtcp_set_safestore_time
|
||||
*/
|
||||
extern "C" int dmtcp_checkpoint_safestore_period( double in_time ) {
|
||||
the_cpr->dmtcp_set_safestore_time(in_time) ;
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
* @copydoc Trick::CheckPointRestart::set_cpu_num
|
||||
@ -130,21 +112,6 @@ extern "C" const char * checkpoint_get_load_file() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
@brief @userdesc Command to dump a checkpoint now to the specified file.
|
||||
This is invoked when the user clicks the "Dump DMTCP Chkpnt" button on the sim control panel.
|
||||
@par Python Usage:
|
||||
@code trick.dmtcp_checkpoint("<file_name>") @endcode
|
||||
@param file_name - name of checkpoint file to dump (leave blank and Trick will use filename "dmtcp_chkpnt_<simtime>"
|
||||
@return always 0
|
||||
*/
|
||||
extern "C" int dmtcp_checkpoint( const char * file_name __attribute__((unused))) {
|
||||
the_cpr->dmtcp_checkpoint( std::string(file_name) );
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::CheckPointRestart
|
||||
@brief @userdesc Command to dump a checkpoint now to the specified file.
|
||||
|
@ -2,6 +2,3 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefi
|
||||
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
|
||||
-include Makefile_deps
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
||||
|
@ -20,8 +20,6 @@ object_${TRICK_HOST_CPU}/next_attr_name.o: next_attr_name.cpp \
|
||||
${TRICK_HOME}/include/trick/CheckPointRestart_c_intf.hh
|
||||
object_${TRICK_HOST_CPU}/stl_type_name_convert.o: stl_type_name_convert.cpp
|
||||
object_${TRICK_HOST_CPU}/CheckPointRestart.o: CheckPointRestart.cpp \
|
||||
${TRICK_HOME}/include/trick/DMTCP.hh \
|
||||
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
|
||||
${TRICK_HOME}/include/trick/Scheduler.hh \
|
||||
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
|
||||
${TRICK_HOME}/include/trick/JobData.hh \
|
||||
|
@ -2,6 +2,3 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefi
|
||||
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
|
||||
-include Makefile_deps
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
||||
|
@ -1,333 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <libgen.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <dlfcn.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#ifdef _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include "trick/DMTCP.hh"
|
||||
#include "trick/SimObject.hh"
|
||||
#include "trick/exec_proto.h"
|
||||
#include "trick/message_proto.h"
|
||||
#include "trick/Executive.hh"
|
||||
#include "trick/CheckPointRestart.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Trick::DMTCP * the_dmtcp ;
|
||||
|
||||
Trick::DMTCP::DMTCP() {
|
||||
|
||||
int num_classes = 0 ;
|
||||
|
||||
checkpoint_now = false ;
|
||||
dmtcp_ckpt_complete = 0;
|
||||
|
||||
class_map["dmtcp_pre"] = num_classes ;
|
||||
class_to_queue[num_classes++] = &pre_queue ;
|
||||
|
||||
class_map["dmtcp_post"] = num_classes ;
|
||||
class_to_queue[num_classes++] = &post_queue ;
|
||||
|
||||
class_map["dmtcp_restart"] = num_classes ;
|
||||
class_to_queue[num_classes++] = &restart_queue ;
|
||||
|
||||
the_dmtcp = this ;
|
||||
}
|
||||
|
||||
int Trick::DMTCP::init() {
|
||||
|
||||
#ifdef _DMTCP
|
||||
if( dmtcpIsEnabled() )
|
||||
dmtcpInstallHooks(dmtcp_pre_checkpoint, dmtcp_post_checkpoint, dmtcp_restart);
|
||||
#endif
|
||||
|
||||
call_dmtcp();
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
|
||||
int Trick::DMTCP::freeze() {
|
||||
call_dmtcp();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int Trick::DMTCP::pre() {
|
||||
JobData * curr_job ;
|
||||
|
||||
pre_queue.reset_curr_index() ;
|
||||
|
||||
while ( (curr_job = pre_queue.get_next_job()) != NULL ) {
|
||||
curr_job->call() ;
|
||||
}
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
int Trick::DMTCP::post() {
|
||||
JobData * curr_job ;
|
||||
|
||||
renameRestartScript();
|
||||
|
||||
post_queue.reset_curr_index() ;
|
||||
|
||||
while ( (curr_job = post_queue.get_next_job()) != NULL )
|
||||
curr_job->call() ;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
int Trick::DMTCP::restart() {
|
||||
JobData * curr_job ;
|
||||
|
||||
restart_queue.reset_curr_index() ;
|
||||
|
||||
while ( (curr_job = restart_queue.get_next_job()) != NULL )
|
||||
curr_job->call() ;
|
||||
|
||||
while ( !dmtcp_checkpoint_jobs_queue.empty())
|
||||
dmtcp_checkpoint_jobs_queue.pop();
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
|
||||
int Trick::DMTCP::call_dmtcp() {
|
||||
|
||||
if ( checkpoint_now == true ) {
|
||||
|
||||
dmtcp_ckpt_complete = 0;
|
||||
sleep(1); // this sleep() is required to give flag
|
||||
// enough time to refresh in Trick View
|
||||
// it does not cause overruns
|
||||
|
||||
#ifdef _DMTCP
|
||||
if( dmtcpIsEnabled() )
|
||||
dmtcpRunCommand('c') ;
|
||||
#endif
|
||||
checkpoint_now = false ;
|
||||
}
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
void Trick::DMTCP::dmtcpSetCheckpointNow() {
|
||||
checkpoint_now = true ;
|
||||
}
|
||||
|
||||
std::string Trick::DMTCP::splitFilename( const string& str ) {
|
||||
size_t found;
|
||||
std::string filename;
|
||||
|
||||
found=str.find_last_of("/\\");
|
||||
filename = str.substr(found+1);
|
||||
|
||||
return filename;
|
||||
}
|
||||
|
||||
bool Trick::DMTCP::isSpecialCharacter( const std::string& str) {
|
||||
|
||||
size_t found = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_.-#%@=:^" );
|
||||
|
||||
if (found!=string::npos)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void Trick::DMTCP::dmtcpJobQueue( std::string file_name ) {
|
||||
dmtcp_checkpoint_jobs_queue.push( file_name ) ;
|
||||
|
||||
if ( (dmtcp_checkpoint_jobs_queue.size() >= 2) and (strcmp(file_name.c_str(),"dmtcp_chkpnt_safestore") ) )
|
||||
dmtcp_checkpoint_jobs_queue.pop() ;
|
||||
}
|
||||
|
||||
|
||||
std::string Trick::DMTCP::getScriptName() {
|
||||
std::string restart_script_name ;
|
||||
#ifdef _DMTCP
|
||||
std::stringstream default_script_name ;
|
||||
|
||||
// Default script name
|
||||
default_script_name << "dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
|
||||
|
||||
// Script name from memory manager wrapper (input file or gui)
|
||||
std::string mmw_script_name = dmtcp_checkpoint_jobs_queue.top();
|
||||
|
||||
// User specified a new directory
|
||||
if ( strstr(mmw_script_name.c_str(),"/") != NULL ){
|
||||
std::string stripped_name = splitFilename( mmw_script_name );
|
||||
std::cout << "DMTCP checkpoints are written to the --ckptdir directory (defined in $TRICK_HOME/bin/s_define_dmtcp).\n";
|
||||
mmw_script_name = stripped_name;
|
||||
}
|
||||
|
||||
// User specifed a valid script name (or is using default script name)
|
||||
if ( ( mmw_script_name == default_script_name.str() ) or ( !isSpecialCharacter( mmw_script_name ) ) )
|
||||
restart_script_name = mmw_script_name;
|
||||
|
||||
// User specifed an invalid script name
|
||||
else {
|
||||
std::cout << mmw_script_name.c_str() << " is not a valid name. The default DMTCP script name will be used: " << default_script_name.str() << endl;
|
||||
restart_script_name = default_script_name.str();
|
||||
}
|
||||
#endif
|
||||
return restart_script_name;
|
||||
}
|
||||
|
||||
void Trick::DMTCP::dmtcpRenameCmd() {
|
||||
#ifdef _DMTCP
|
||||
std::stringstream dmtcp_rename_command;
|
||||
std::stringstream no_env_dmtcp_rename_command;
|
||||
std::stringstream trick_restart_script;
|
||||
std::stringstream dmtcp_restart_script;
|
||||
std::stringstream no_env_dmtcp_restart_script;
|
||||
std::string mmw_filename = getScriptName();
|
||||
|
||||
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
|
||||
|
||||
if (lsp->genId) {
|
||||
dmtcp_restart_script << lsp->env << "/dmtcp_restart_script_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << ".sh";
|
||||
no_env_dmtcp_restart_script << "./dmtcp_restart_script_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << ".sh";
|
||||
}
|
||||
else {
|
||||
dmtcp_restart_script << lsp->env << "/dmtcp_restart_script_" << lsp->uniquePidStr << ".sh";
|
||||
no_env_dmtcp_restart_script << "./dmtcp_restart_script_" << lsp->uniquePidStr << ".sh";
|
||||
}
|
||||
|
||||
trick_restart_script << lsp->env << "/" << mmw_filename;
|
||||
dmtcp_rename_command << "mv " << dmtcp_restart_script.str() << " " << trick_restart_script.str();
|
||||
no_env_dmtcp_rename_command << "mv " << no_env_dmtcp_restart_script.str() << " " << trick_restart_script.str() << "; rm -f ./dmtcp_restart_script.sh";
|
||||
|
||||
ifstream dmtcp_file( dmtcp_restart_script.str().c_str() );
|
||||
if ( dmtcp_file.good() ) {
|
||||
dmtcpSystemCmd( dmtcp_rename_command.str() );
|
||||
}
|
||||
|
||||
// There is a bug in DMTCP that causes the DMTCP Coordinator (see dmtcp::DmtcpCoordinator::writeRestartScript())
|
||||
// to not recognize ENV_VAR_CHECKPOINT_DIR when dumping a checkpoint from a restarted checkpoint.
|
||||
// As a result, DMTCP writes the restart script to the current directory.
|
||||
// This code just checks to see if it's in the current dir, if so, move it to the ENV_VAR_CHECKPOINT_DIR with the new script name.
|
||||
ifstream no_env_dmtcp_file( no_env_dmtcp_restart_script.str().c_str() );
|
||||
if ( no_env_dmtcp_file.good() ) {
|
||||
dmtcpSystemCmd( no_env_dmtcp_rename_command.str() );
|
||||
}
|
||||
|
||||
dmtcpSafestoreDir();
|
||||
|
||||
// Don't print out "dumped" message until checkpont file (ckpt_*.dmtcp) is written
|
||||
std::stringstream dmtcp_temp;
|
||||
dmtcp_temp << lsp->env << "/ckpt_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << "/ckpt_S_main_" << getenv("TRICK_HOST_CPU") << ".exe_" << lsp->uniquePidStr << ".dmtcp.temp";
|
||||
|
||||
while( access( dmtcp_temp.str().c_str(),F_OK ) == 0 ) {}
|
||||
|
||||
dmtcp_ckpt_complete = 1;
|
||||
|
||||
message_publish(MSG_INFO, "Dumped DMTCP Checkpoint: %s\n", mmw_filename.c_str()) ;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void Trick::DMTCP::dmtcpSystemCmd( const string& str ) {
|
||||
std::string real_system_name ;
|
||||
|
||||
void* dlhandle ;
|
||||
void (*real_system_ptr)(const char *) = NULL ;
|
||||
|
||||
dlhandle = dlopen( NULL, RTLD_LAZY) ;
|
||||
real_system_name = "_real_system" ;
|
||||
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
|
||||
|
||||
if ( real_system_ptr != NULL )
|
||||
(*real_system_ptr)( str.c_str() ) ;
|
||||
else
|
||||
system( str.c_str() );
|
||||
}
|
||||
|
||||
void Trick::DMTCP::dmtcpSafestoreDir() {
|
||||
#ifdef _DMTCP
|
||||
|
||||
std::string restart_script_name;
|
||||
char* checkpoint_dirname ;
|
||||
char* long_checkpoint_name ;
|
||||
char rm_safestore_dir[512] ;
|
||||
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
|
||||
|
||||
long_checkpoint_name = strdup(lsp->checkpointFilename) ;
|
||||
|
||||
// For DMTCP Safestore checkpoints, only keep newest ckpt_* directory
|
||||
if ( restart_script_name.c_str() == "dmtcp_chkpnt_safestore" ) {
|
||||
|
||||
checkpoint_dirname = dirname(long_checkpoint_name) ;
|
||||
|
||||
// If there's a new ckpt_* directory, remove the previous version
|
||||
if ( strcmp(prev_checkpoint_dirname, checkpoint_dirname) ) {
|
||||
sprintf( rm_safestore_dir, "cd %s; rm -rf %s", lsp->env, prev_checkpoint_dirname);
|
||||
dmtcpSystemCmd( rm_safestore_dir );
|
||||
}
|
||||
|
||||
strcpy( prev_checkpoint_dirname, checkpoint_dirname );
|
||||
}
|
||||
#else
|
||||
strcpy( prev_checkpoint_dirname, "" );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void Trick::DMTCP::dmtcpCleanup() {
|
||||
#ifdef _DMTCP
|
||||
char dmtcp_restart_script_sh[1024];
|
||||
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
|
||||
|
||||
sprintf( dmtcp_restart_script_sh, "%s/dmtcp_restart_script.sh", lsp->env );
|
||||
unlink(dmtcp_restart_script_sh) ;
|
||||
|
||||
if ( !dmtcp_checkpoint_jobs_queue.empty() )
|
||||
dmtcp_checkpoint_jobs_queue.pop();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Trick::DMTCP::renameRestartScript() {
|
||||
#ifdef _DMTCP
|
||||
if( dmtcpIsEnabled() ) {
|
||||
dmtcpRenameCmd();
|
||||
dmtcpCleanup();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int Trick::DMTCP::write_s_job_execution( FILE * fp ) {
|
||||
|
||||
if ( fp == NULL ) {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
fprintf(fp, "\n===================================================================================================\n") ;
|
||||
fprintf(fp, "DMTCP :\n\n") ;
|
||||
|
||||
write_non_sched_queue(fp, &pre_queue) ;
|
||||
write_non_sched_queue(fp, &post_queue) ;
|
||||
write_non_sched_queue(fp, &restart_queue) ;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
int Trick::DMTCP::instrument_job_before(Trick::JobData * instrument_job __attribute__((unused)) ) {
|
||||
return 0 ;
|
||||
}
|
||||
int Trick::DMTCP::instrument_job_after(Trick::JobData * instrument_job __attribute__((unused)) ) {
|
||||
return 0 ;
|
||||
}
|
||||
int Trick::DMTCP::instrument_job_remove(std::string in_job __attribute__((unused)) ) {
|
||||
return 0 ;
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
|
||||
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
|
||||
-include Makefile_deps
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
@ -1,27 +0,0 @@
|
||||
object_${TRICK_HOST_CPU}/DMTCP.o: DMTCP.cpp ${TRICK_HOME}/include/trick/DMTCP.hh \
|
||||
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
|
||||
${TRICK_HOME}/include/trick/Scheduler.hh \
|
||||
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
|
||||
${TRICK_HOME}/include/trick/JobData.hh \
|
||||
${TRICK_HOME}/include/trick/InstrumentBase.hh \
|
||||
${TRICK_HOME}/include/trick/SimObject.hh \
|
||||
${TRICK_HOME}/include/trick/exec_proto.h \
|
||||
${TRICK_HOME}/include/trick/sim_mode.h \
|
||||
${TRICK_HOME}/include/trick/message_proto.h \
|
||||
${TRICK_HOME}/include/trick/message_type.h \
|
||||
${TRICK_HOME}/include/trick/Executive.hh \
|
||||
${TRICK_HOME}/include/trick/Scheduler.hh \
|
||||
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
|
||||
${TRICK_HOME}/include/trick/SimObject.hh \
|
||||
${TRICK_HOME}/include/trick/Threads.hh \
|
||||
${TRICK_HOME}/include/trick/ThreadBase.hh \
|
||||
${TRICK_HOME}/include/trick/sim_mode.h \
|
||||
${TRICK_HOME}/include/trick/CheckPointRestart.hh
|
||||
object_${TRICK_HOST_CPU}/dmtcp_checkpoint_c_intf.o: dmtcp_checkpoint_c_intf.cpp \
|
||||
${TRICK_HOME}/include/trick/DMTCP.hh \
|
||||
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
|
||||
${TRICK_HOME}/include/trick/Scheduler.hh \
|
||||
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
|
||||
${TRICK_HOME}/include/trick/JobData.hh \
|
||||
${TRICK_HOME}/include/trick/InstrumentBase.hh \
|
||||
${TRICK_HOME}/include/trick/SimObject.hh
|
@ -1,69 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#ifdef _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include "trick/DMTCP.hh"
|
||||
|
||||
/** Global singleton pointer to DMTCP */
|
||||
extern Trick::DMTCP * the_dmtcp ;
|
||||
|
||||
#ifdef _DMTCP
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::dmtcpIsEnabled()
|
||||
*/
|
||||
extern "C" int dmtcp_is_enabled() {
|
||||
return dmtcpIsEnabled() ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::dmtcp_pre_checkpoint()
|
||||
*/
|
||||
extern "C" void dmtcp_pre_checkpoint() {
|
||||
the_dmtcp->pre() ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::dmtcp_post_checkpoint()
|
||||
*/
|
||||
extern "C" void dmtcp_post_checkpoint() {
|
||||
the_dmtcp->post() ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::restart()
|
||||
*/
|
||||
extern "C" void dmtcp_restart() {
|
||||
the_dmtcp->restart() ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::call_dmtcp()
|
||||
*/
|
||||
extern "C" int call_dmtcp() {
|
||||
the_dmtcp->call_dmtcp();
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::dmtcpSetCheckpointNow()
|
||||
*/
|
||||
extern "C" void dmtcp_set_checkpoint_now() {
|
||||
the_dmtcp->dmtcpSetCheckpointNow() ;
|
||||
}
|
||||
|
||||
/**
|
||||
* @relates Trick::DMTCP
|
||||
* @copydoc Trick::DMTCP::dmtcpJobQueue()
|
||||
*/
|
||||
extern "C" void dmtcp_job_queue( std::string file_name ) {
|
||||
the_dmtcp->dmtcpJobQueue( file_name );
|
||||
}
|
||||
#endif
|
@ -15,11 +15,6 @@
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
|
||||
#if _DMTCP
|
||||
#include <dlfcn.h>
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include "trick/DataRecordDispatcher.hh"
|
||||
#include "trick/exec_proto.h"
|
||||
#include "trick/exec_proto.hh"
|
||||
@ -80,27 +75,7 @@ int Trick::DataRecordDispatcher::remove_files() {
|
||||
|
||||
std::string command;
|
||||
command = std::string("/bin/rm -rf ") + command_line_args_get_output_dir() + std::string("/log_*") ;
|
||||
#ifdef _DMTCP
|
||||
if( dmtcpIsEnabled() ) {
|
||||
std::string real_system_name ;
|
||||
void* dlhandle ;
|
||||
void (*real_system_ptr)(const char *) = NULL ;
|
||||
dlhandle = dlopen( NULL, RTLD_LAZY) ;
|
||||
real_system_name = "_real_system" ;
|
||||
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
|
||||
if ( real_system_ptr != NULL ) {
|
||||
printf("\nDataRecordDispatcher::remove_files() calling DMTCP _real_system \"%s\"\n" , command.c_str()) ;
|
||||
(*real_system_ptr)(command.c_str()) ;
|
||||
} else {
|
||||
system(command.c_str());
|
||||
}
|
||||
dlclose(dlhandle) ;
|
||||
} else {
|
||||
system(command.c_str());
|
||||
}
|
||||
#else
|
||||
system(command.c_str());
|
||||
#endif
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
@ -254,19 +229,6 @@ int Trick::DataRecordDispatcher::restart() {
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
/**
|
||||
@details
|
||||
-# Call the restart job for all of the groups.
|
||||
*/
|
||||
int Trick::DataRecordDispatcher::dmtcp_restart() {
|
||||
unsigned int ii ;
|
||||
for ( ii = 0 ; ii < groups.size() ; ii++ ) {
|
||||
groups[ii]->dmtcp_restart() ;
|
||||
}
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@details
|
||||
-# If the thread was started,
|
||||
|
@ -107,9 +107,6 @@ Trick::DataRecordGroup::DataRecordGroup( std::string in_name ) :
|
||||
add_job(0, 5, (char *)"restart", NULL, 1.0, (char *)"restart", (char *)"TRK", 60001) ;
|
||||
add_job(0, 6, (char *)"shutdown", NULL, 1.0, (char *)"shutdown", (char *)"TRK") ;
|
||||
|
||||
// (Alex 1/15/14) The dmtcp_restart job is called by the DataRecordDispatcher... is the
|
||||
// dispatcher necessary anymore?
|
||||
|
||||
write_job = add_job(0, 99, (char *)job_class.c_str(), NULL, cycle, (char *)"data_record" , (char *)"TRK") ;
|
||||
|
||||
add_time_variable() ;
|
||||
@ -527,14 +524,6 @@ int Trick::DataRecordGroup::restart() {
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
/**
|
||||
@details
|
||||
-# This restart is targetted at DMTCP restarts where we only need to rewrite the header.
|
||||
*/
|
||||
int Trick::DataRecordGroup::dmtcp_restart() {
|
||||
return write_header() ;
|
||||
}
|
||||
|
||||
int Trick::DataRecordGroup::write_header() {
|
||||
|
||||
unsigned int jj ;
|
||||
@ -546,9 +535,6 @@ int Trick::DataRecordGroup::write_header() {
|
||||
|
||||
out_stream.open(header_name.c_str(), std::fstream::out ) ;
|
||||
if ( ! out_stream || ! out_stream.good() ) {
|
||||
#ifndef _DMTCP
|
||||
message_publish(MSG_ERROR, "Can't open Data Record file %s.\n", header_name.c_str()) ;
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,3 @@ $(OBJ_DIR)/io_DRHDF5.o: TRICK_CXXFLAGS += -I$(HDF5)/include
|
||||
endif
|
||||
TRICK_CXXFLAGS += -DHDF5
|
||||
endif
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
||||
|
@ -6,10 +6,6 @@
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
|
||||
#if _DMTCP
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
#include "trick/ExternalApplication.hh"
|
||||
#include "trick/ExternalApplicationManager.hh"
|
||||
#include "trick/variable_server_proto.h"
|
||||
@ -170,45 +166,10 @@ void Trick::ExternalApplication::launch() {
|
||||
argv = command_line_args_get_argv() ;
|
||||
|
||||
oss << command << " " << arguments.str() << " " << create_arguments_string() ;
|
||||
if (argc > 2) {
|
||||
for (int i=0;i<argc;i++) {
|
||||
if (!strcmp(argv[i], "dmtcp")) {
|
||||
oss << " -dmtcp" ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
oss << " &";
|
||||
|
||||
#ifdef _DMTCP
|
||||
if( dmtcpIsEnabled() ) {
|
||||
std::string real_system_name ;
|
||||
|
||||
void* dlhandle ;
|
||||
void (*real_system_ptr)(const char *) = NULL ;
|
||||
|
||||
dlhandle = dlopen( NULL, RTLD_LAZY) ;
|
||||
|
||||
real_system_name = "_real_system" ;
|
||||
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
|
||||
|
||||
|
||||
if ( real_system_ptr != NULL ) {
|
||||
std::cout << "\nExternalApplication::launch() calling DMTCP _real_system \"" << oss.str() << "\n";
|
||||
(*real_system_ptr)(oss.str().c_str()) ;
|
||||
} else {
|
||||
std::cout << "calling DMTCP \"" << oss.str().c_str() << "\"" << std::endl;
|
||||
system(oss.str().c_str());
|
||||
}
|
||||
|
||||
dlclose(dlhandle) ;
|
||||
} else {
|
||||
system(oss.str().c_str());
|
||||
}
|
||||
#else
|
||||
std::cout << oss.str() << std::endl;
|
||||
system(oss.str().c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,3 @@
|
||||
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
|
||||
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
|
||||
-include Makefile_deps
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
||||
|
@ -90,7 +90,6 @@ int Trick::MSSharedMem::connect() {
|
||||
tsm_dev.size = sizeof(MSSharedMemData);
|
||||
ret = tsm_init(&tsm_dev);
|
||||
} else {
|
||||
// handle reconnecting for dmtcp restart
|
||||
ret = tsm_reconnect(&tsm_dev);
|
||||
}
|
||||
shm_addr = (MSSharedMemData*) tsm_dev.addr;
|
||||
|
@ -48,15 +48,7 @@ std::string Trick::MSSocket::add_sim_args( std::string slave_type ) {
|
||||
/** @li create a unique identifier based on the <machine>_<current pid>. */
|
||||
gethostname(master_host, (size_t) 80);
|
||||
|
||||
/** @li if master is running with dmtcp slave or vice versa, use "_dmtcp_multiconnect_tag"
|
||||
for sync_port_tag. on restart, dmtcp will retain original slave pid, which will not
|
||||
match restarted master pid. sync_port_tag must match for master and slave(s) to
|
||||
connect/reconnect via tc_multiconnect() */
|
||||
|
||||
if (slave_type == "dmtcp")
|
||||
temp_stream << master_host << "_dmtcp_multiconnect_tag" ;
|
||||
else
|
||||
temp_stream << master_host << "_" << getpid() ;
|
||||
temp_stream << master_host << "_" << getpid() ;
|
||||
|
||||
sync_port_tag = temp_stream.str() ;
|
||||
|
||||
|
@ -1,7 +1,3 @@
|
||||
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
|
||||
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
|
||||
-include Makefile_deps
|
||||
|
||||
ifneq ($(DMTCP),)
|
||||
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
|
||||
endif
|
||||
|
@ -47,7 +47,6 @@ Trick::SlaveInfo::SlaveInfo() {
|
||||
reconnect_count = 0;
|
||||
chkpnt_dump_auto = true ;
|
||||
chkpnt_load_auto = true ;
|
||||
chkpnt_binary = false ;
|
||||
}
|
||||
|
||||
int Trick::SlaveInfo::set_connection_type(Trick::MSConnect * in_connection) {
|
||||
@ -145,10 +144,6 @@ int Trick::SlaveInfo::start() {
|
||||
if ( ! run_input_file.empty() ) {
|
||||
|
||||
startup_command << " " << run_input_file ;
|
||||
|
||||
/** @li check to see if master is running with dmtcp slave */
|
||||
if (run_input_file.find("dmtcp") != std::string::npos)
|
||||
slave_type = "dmtcp";
|
||||
}
|
||||
|
||||
/** @li Add the connection specific arguments to the startup command */
|
||||
@ -234,15 +229,11 @@ int Trick::SlaveInfo::read_slave_status() {
|
||||
}
|
||||
else {
|
||||
message_publish(MSG_WARNING, "Slave is exiting.\n") ;
|
||||
// if reconnect_wait_limit is set, master waits for slave to reconnect (e.g. dmtcp restarting)
|
||||
// if reconnect_wait_limit is set, master waits for slave to reconnect
|
||||
if (reconnect_wait_limit > 0.0) {
|
||||
message_publish(MSG_WARNING, "Master will wait %f seconds for slave to reconnect.\n", reconnect_wait_limit) ;
|
||||
// make reads (shared mem connection) return quickly so we don't overrun waiting for reconnect
|
||||
// TODO: for socket connection we will overrun in the accept call (see restart_dmtcp_slave)
|
||||
connection->set_sync_wait_limit(exec_get_freeze_frame());
|
||||
if (chkpnt_binary) {
|
||||
restart_dmtcp_slave(); // restart the slave dmtcp executable
|
||||
}
|
||||
}
|
||||
else {
|
||||
message_publish(MSG_WARNING, "reconnect_wait_limit: 0.0 - Master will stop communicating with slave.\n") ;
|
||||
@ -251,13 +242,6 @@ int Trick::SlaveInfo::read_slave_status() {
|
||||
return(0) ;
|
||||
}
|
||||
break ;
|
||||
case (MS_ChkpntLoadBinCmd):
|
||||
// slave has received our load command and is now sending us his dmtcp port and checkpoint file name
|
||||
dmtcp_port = connection->read_port() ;
|
||||
connection->read_name(chkpnt_name, sizeof(chkpnt_name)); // dir/filename
|
||||
message_publish(MSG_WARNING , "Master received DMTCP Port and Checkpoint Filename from slave.\n");
|
||||
connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ; // send this as an ack so slove can shut down
|
||||
break ;
|
||||
case (MS_FreezeCmd):
|
||||
/** @li if the current slave is freezing, freeze the master too */
|
||||
message_publish(MSG_INFO, "Slave is freezing.\n") ;
|
||||
@ -288,12 +272,6 @@ int Trick::SlaveInfo::write_master_status() {
|
||||
/** @li write the current exec_command according to the master to the slave */
|
||||
connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ;
|
||||
}
|
||||
if ((MS_SIM_COMMAND)exec_get_exec_command() == MS_ChkpntLoadBinCmd) {
|
||||
// dmtcp slave will exit, so stop writing status to slave until it reconnects
|
||||
// reconnect_count prevents us from writing status to slave, & is incremented every freeze cycle until we have reconnected
|
||||
reconnect_count = 1;
|
||||
}
|
||||
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
@ -460,22 +438,10 @@ int Trick::Master::checkpoint() {
|
||||
std::string full_path_name = checkpoint_get_output_file();
|
||||
for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
|
||||
if (slaves[ii]->chkpnt_dump_auto) {
|
||||
if (slaves[ii]->chkpnt_binary) {
|
||||
if (slaves[ii]->slave_type == "dmtcp") {
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpBinCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
} else {
|
||||
message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot dump binary checkpoint.\n") ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
}
|
||||
} else { // ascii
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpAsciiCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
}
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpAsciiCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
} else { // no auto dump
|
||||
slaves[ii]->write_master_status() ;
|
||||
}
|
||||
@ -496,22 +462,10 @@ int Trick::Master::preload_checkpoint() {
|
||||
std::string full_path_name = checkpoint_get_load_file();
|
||||
for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
|
||||
if (slaves[ii]->chkpnt_load_auto) {
|
||||
if (slaves[ii]->chkpnt_binary) {
|
||||
if (slaves[ii]->slave_type == "dmtcp") {
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadBinCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
} else {
|
||||
message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot load binary checkpoint.\n") ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
}
|
||||
} else { // ascii
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadAsciiCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
}
|
||||
exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadAsciiCmd) ;
|
||||
slaves[ii]->write_master_status() ;
|
||||
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
|
||||
exec_set_exec_command(save_command) ;
|
||||
} else { // no auto load
|
||||
slaves[ii]->write_master_status() ;
|
||||
}
|
||||
@ -549,92 +503,6 @@ int Trick::Master::shutdown() {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::SlaveInfo::restart_dmtcp_slave() {
|
||||
#ifdef _DMTCP
|
||||
FILE *fp;
|
||||
char *dmtcp_path, line[256];
|
||||
std::string config_file;
|
||||
std::string dmtcp_command;
|
||||
std::stringstream dmtcp_port_str;
|
||||
pid_t pid, dmtcp_pid;
|
||||
|
||||
/** @par Detailed Design: */
|
||||
if ( enabled ) {
|
||||
if (slave_type != "dmtcp") {
|
||||
message_publish(MSG_ERROR, "Cannot auto-start slave because it was not running under dmtcp control.\n") ;
|
||||
return(0);
|
||||
}
|
||||
/** @li If chkpnt_load_auto is specified, restart the slave by executing the user-supplied chkpnt_name... */
|
||||
if (chkpnt_load_auto) {
|
||||
if (chkpnt_name[0] == MS_ERROR_NAME) {
|
||||
message_publish(MSG_WARNING, "Cannot auto-start slave because master did not receive chkpnt_name from slave.\n");
|
||||
} else {
|
||||
/** @li First kill slave's dmtcp_coordinator because sometimes it does not quit like it's supposed to. */
|
||||
if (dmtcp_port > 0) { // slave sends 0 if it can't get the port num from the environment
|
||||
/** @li Get dmtcp path from trick's configure output file (dmtcp is only supported in linux). */
|
||||
config_file = std::string(getenv("TRICK_HOME")) + "/config_Linux.mk";
|
||||
if ((fp = fopen(config_file.c_str() , "r")) != NULL ) {
|
||||
while (fgets(line, sizeof(line), fp) != NULL) {
|
||||
if (strncmp(line, "DMTCP", 5)==0) {
|
||||
dmtcp_path = strchr(line, '/');
|
||||
dmtcp_path[strlen(dmtcp_path)-1] = '\0'; // remove newline character
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/** @li Issue a dmtcp_command to kill the dmtcp_coordinator. */
|
||||
fprintf(stderr, "Master attempting to kill slave's dmtcp_coordinator port= %ld"
|
||||
" (it may not exist, that's ok)\n", dmtcp_port);
|
||||
//dmtcp_command.str(""); // reset our command string
|
||||
dmtcp_command = dmtcp_path + std::string("/bin/dmtcp_command");
|
||||
if (access(dmtcp_command.c_str(), F_OK) != 0) {
|
||||
fprintf(stderr, "\nCould not find %s in order to kill the dmtcp_coordinator.\n",
|
||||
dmtcp_command.c_str());
|
||||
} else {
|
||||
//dmtcp_command << " --quiet -p " << dmtcp_port << " q";
|
||||
message_publish(MSG_WARNING, "Restarting DMTCP coordinator\n");
|
||||
if((dmtcp_pid = fork()) == 0) {
|
||||
setsid();
|
||||
dmtcp_port_str << dmtcp_port;
|
||||
int execReturn = execl(dmtcp_command.c_str(), "dmtcp_command", "--quiet", "-p", dmtcp_port_str.str().c_str(), "q", NULL);
|
||||
_Exit(0);
|
||||
} else {
|
||||
int f_status = 0;
|
||||
if(dmtcp_pid > 0) {
|
||||
waitpid(dmtcp_pid, &f_status, 0);
|
||||
} else {
|
||||
message_publish(MSG_ERROR, "Unable to send DMTCP restart command\n");
|
||||
}
|
||||
}
|
||||
//system(dmtcp_command.str().c_str());
|
||||
}
|
||||
} // end if dmtcp_port > 0
|
||||
/** @li Finally invoke the slave's dmtcp checkpoint script. */
|
||||
message_publish(MSG_WARNING, "Auto-starting slave: %s.\n", chkpnt_name);
|
||||
if ((pid = fork()) == 0) {
|
||||
setsid();
|
||||
std::istringstream sChkpnt(chkpnt_name);
|
||||
std::string fileName;
|
||||
while (std::getline(sChkpnt, fileName, '/'));
|
||||
//fprintf(stderr, "------> Starting: %s\n", fileName.c_str());
|
||||
int execReturn = execl(chkpnt_name, fileName.c_str(), NULL);
|
||||
_Exit(0);
|
||||
}
|
||||
}
|
||||
} // end chkpnt_auto
|
||||
/** @li If our connection is a socket, disconnect the socket and call accept again */
|
||||
if (dynamic_cast<MSSocket*>(connection)) {
|
||||
connection->disconnect();
|
||||
//TODO: this will block until slave restarts, possibly causing overruns in freeze mode
|
||||
connection->accept();
|
||||
}
|
||||
reconnect_count = 0; // start writing status to slave again
|
||||
}
|
||||
#endif
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @relates Trick::Master
|
||||
* C binded function to toggle the master/slave synchronization flag to on.
|
||||
|
@ -39,16 +39,6 @@ int Trick::Slave::process_sim_args() {
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::Slave::dmtcp_restart() {
|
||||
if ( enabled ) {
|
||||
reconnected = true ;
|
||||
connection->disconnect();
|
||||
connection->connect();
|
||||
}
|
||||
|
||||
return(0) ;
|
||||
}
|
||||
|
||||
int Trick::Slave::init() {
|
||||
|
||||
std::string rts_disable_name ;
|
||||
@ -132,17 +122,6 @@ std::string Trick::Slave::get_checkpoint_name(MS_SIM_COMMAND command) {
|
||||
file_name_stream << "chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
|
||||
}
|
||||
}
|
||||
#ifdef _DMTCP
|
||||
// dmtcp_checkpoint() only wants a filename -- no dir path
|
||||
if (command == MS_ChkpntDumpBinCmd) {
|
||||
if (chkpnt_name[0] != MS_ERROR_NAME) {
|
||||
file_name_stream << "dmtcp_" << std::string(strrchr(chkpnt_name, '/')+1); // dmtcp_ + filename
|
||||
//std::cout << "----> Slave: parsed checkpoint file name: " << file_name_stream.str() << std::endl;
|
||||
} else {
|
||||
file_name_stream << ""; // dmtcp will create default name
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// ascii load_checkpoint() wants the dir/filename path
|
||||
if (command == MS_ChkpntLoadAsciiCmd) {
|
||||
dir = command_line_args_get_output_dir(); // run dir
|
||||
@ -152,22 +131,6 @@ std::string Trick::Slave::get_checkpoint_name(MS_SIM_COMMAND command) {
|
||||
file_name_stream << dir << "/chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
|
||||
}
|
||||
}
|
||||
#ifdef _DMTCP
|
||||
// dmtcp load wants full dir/filename path that will be sent to the master
|
||||
if (command == MS_ChkpntLoadBinCmd) {
|
||||
dir = getenv("DMTCP_CHECKPOINT_DIR"); // env variable set by dmtcp
|
||||
if (chkpnt_name[0] != MS_ERROR_NAME) {
|
||||
file_name_stream << dir << "/dmtcp_" << std::string(strrchr(chkpnt_name, '/')+1); // dmtcp dir / filename
|
||||
} else { // create default name
|
||||
file_name_stream << dir << "/dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
|
||||
}
|
||||
if (file_name_stream.str().length() > sizeof(chkpnt_name)-1) {
|
||||
message_publish(MSG_ERROR, "Slave could not send checkpoint name to master because name too long (max = %d).\n",
|
||||
sizeof(chkpnt_name)) ;
|
||||
file_name_stream << MS_ERROR_NAME; // send error character
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return(file_name_stream.str()) ;
|
||||
}
|
||||
@ -178,10 +141,6 @@ int Trick::Slave::end_of_frame() {
|
||||
MS_SIM_COMMAND command ;
|
||||
MS_SIM_COMMAND slave_command ;
|
||||
std::string chkpt_name_str;
|
||||
#ifdef _DMTCP
|
||||
int dmtcp_port;
|
||||
std::string dmtcp_port_str;
|
||||
#endif
|
||||
|
||||
/** @par Detailed Design */
|
||||
if ( (enabled) and (activated) ){
|
||||
@ -252,13 +211,6 @@ int Trick::Slave::end_of_frame() {
|
||||
chkpt_name_str = get_checkpoint_name(MS_ChkpntDumpAsciiCmd);
|
||||
checkpoint(chkpt_name_str.c_str());
|
||||
break;
|
||||
case (MS_ChkpntDumpBinCmd): // Master tells slave to dump a binary checkpoint
|
||||
message_publish(MSG_WARNING , "Slave received Checkpoint Dump Binary command from master.\n") ;
|
||||
#ifdef _DMTCP
|
||||
chkpt_name_str = get_checkpoint_name(MS_ChkpntDumpBinCmd);
|
||||
dmtcp_checkpoint(chkpt_name_str.c_str());
|
||||
#endif
|
||||
break;
|
||||
/** @li if reading the master mode command returned a checkpoint load command, load a checkpoint */
|
||||
case (MS_ChkpntLoadAsciiCmd): // Master tells slave to load an ascii checkpoint
|
||||
message_publish(MSG_WARNING , "Slave received Checkpoint Load command from master.\n") ;
|
||||
@ -266,27 +218,6 @@ int Trick::Slave::end_of_frame() {
|
||||
load_checkpoint(chkpt_name_str.c_str()); // load done in freeze or end_of_frame job
|
||||
//load_checkpoint_job(); // do the load NOW
|
||||
break;
|
||||
case (MS_ChkpntLoadBinCmd): // Master tells slave to load a binary checkpoint
|
||||
message_publish(MSG_WARNING , "Slave received Checkpoint Load Binary command from master.\n") ;
|
||||
#ifdef _DMTCP
|
||||
chkpt_name_str = get_checkpoint_name(MS_ChkpntLoadBinCmd);
|
||||
strcpy(chkpnt_name, chkpt_name_str.c_str());
|
||||
// write the dmtcp_coordinator port to the master so it can kill the coordinator when restarting slave
|
||||
dmtcp_port = 0;
|
||||
dmtcp_port_str= getenv("DMTCP_PORT"); // env variable set by dmtcp
|
||||
if (dmtcp_port_str.length() > 0) {
|
||||
sscanf(dmtcp_port_str.c_str(), "%d", &dmtcp_port);
|
||||
}
|
||||
// this tells master we are sending port number and file name next
|
||||
connection->write_command(MS_ChkpntLoadBinCmd) ;
|
||||
connection->write_port(dmtcp_port) ;
|
||||
connection->write_name(chkpnt_name, sizeof(chkpnt_name)) ;
|
||||
// this is just an ack so we know master received port before we kill socket by shutting down
|
||||
command = connection->read_command() ;
|
||||
fprintf(stderr, "SLAVE GOT ACK %d\n", command);
|
||||
exec_terminate_with_return(0, __FILE__, __LINE__, "YOU MUST NOW RUN SLAVE'S DMTCP CHECKPOINT FILE!.");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
/** @li if reading the master mode command returned an Executive mode, set the slave mode command to the master mode command */
|
||||
exec_set_exec_command((SIM_COMMAND)command) ;
|
||||
|
@ -72,7 +72,6 @@ void * Trick::MessageTCDeviceListenThread::thread_body() {
|
||||
new_connection->disable_handshaking = TC_COMM_TRUE ;
|
||||
new_connection->blockio_limit = 0.0 ;
|
||||
new_connection->blockio_type = TC_COMM_BLOCKIO ;
|
||||
new_connection->dmtcp_use_real = 1 ;
|
||||
new_connection->client_id = 0 ;
|
||||
strcpy(new_connection->client_tag, "") ;
|
||||
new_connection->error_handler = (TrickErrorHndlr *) calloc(1, (int)sizeof(TrickErrorHndlr));
|
||||
|
@ -43,7 +43,6 @@ Trick::VariableServerThread::VariableServerThread(TCDevice * in_listen_dev) :
|
||||
connection.blockio_limit = 0.0 ;
|
||||
connection.blockio_type = TC_COMM_BLOCKIO ;
|
||||
connection.client_id = 0 ;
|
||||
connection.dmtcp_use_real = 1 ;
|
||||
strcpy(connection.client_tag, "") ;
|
||||
connection.error_handler = (TrickErrorHndlr *) calloc(1, (int)sizeof(TrickErrorHndlr));
|
||||
connection.error_handler->report_level = TRICK_ERROR_CAUTION;
|
||||
|
@ -47,10 +47,6 @@
|
||||
#ifdef HDF5
|
||||
#include "trick/DRHDF5.hh"
|
||||
#endif
|
||||
#ifdef DMTCP
|
||||
#include "trick/DMTCP.hh"
|
||||
#include "trick/dmtcp_checkpoint_c_intf.hh"
|
||||
#endif
|
||||
#include "trick/DataRecordDispatcher.hh"
|
||||
#include "trick/data_record_proto.h"
|
||||
#include "trick/DebugPause.hh"
|
||||
|
@ -13,12 +13,6 @@
|
||||
#include "trick/tc_proto.h"
|
||||
#include "trick/trick_byteswap.h"
|
||||
|
||||
#if _DMTCP
|
||||
#include <dlfcn.h>
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
|
||||
int tc_accept_(TCDevice * listen_device, TCDevice * device, const char *file, int line)
|
||||
{
|
||||
socklen_t length;
|
||||
@ -35,27 +29,7 @@ int tc_accept_(TCDevice * listen_device, TCDevice * device, const char *file, in
|
||||
memset(&s_in, 0, sizeof(struct sockaddr_in)) ;
|
||||
/* Accept On Listen Device */
|
||||
length = sizeof(s_in);
|
||||
#if _DMTCP
|
||||
if( dmtcpIsEnabled() && device->dmtcp_use_real ) {
|
||||
const char real_accept_name[] = "_real_accept" ;
|
||||
void* dlhandle ;
|
||||
int (*real_accept_ptr)( int, struct sockaddr *,socklen_t *) = NULL ;
|
||||
|
||||
dlhandle = dlopen( NULL, RTLD_LAZY) ;
|
||||
real_accept_ptr = (int (*)(int, struct sockaddr *,socklen_t *))dlsym( dlhandle , real_accept_name) ;
|
||||
if ( real_accept_ptr != NULL ) {
|
||||
printf("calling DMTCP _real_accept %s:%d\n", file , line) ;
|
||||
the_socket = (*real_accept_ptr)(listen_device->socket, (struct sockaddr *) &s_in, &length) ;
|
||||
} else {
|
||||
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
|
||||
}
|
||||
dlclose(dlhandle) ;
|
||||
} else {
|
||||
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
|
||||
}
|
||||
#else
|
||||
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
|
||||
#endif
|
||||
|
||||
sprintf(client_str, "(ID = %d tag = %s)", listen_device->client_id, listen_device->client_tag);
|
||||
|
||||
|
@ -16,12 +16,6 @@
|
||||
#include "trick/tc_proto.h"
|
||||
#include "trick/trick_byteswap.h"
|
||||
|
||||
#if _DMTCP
|
||||
#include <dlfcn.h>
|
||||
#include "dmtcpaware.h"
|
||||
#endif
|
||||
|
||||
|
||||
int tc_connect_(TCDevice * device, const char *file, int line)
|
||||
{
|
||||
struct sockaddr_in sockin;
|
||||
@ -120,27 +114,7 @@ int tc_connect_(TCDevice * device, const char *file, int line)
|
||||
/*
|
||||
* Establish the connection to the selected server
|
||||
*/
|
||||
#if _DMTCP
|
||||
if( dmtcpIsEnabled() && device->dmtcp_use_real ) {
|
||||
const char real_connect_name[] = "_real_connect" ;
|
||||
void* dlhandle ;
|
||||
int (*real_connect_ptr)( int, struct sockaddr *,socklen_t) = NULL ;
|
||||
|
||||
dlhandle = dlopen( NULL, RTLD_LAZY) ;
|
||||
real_connect_ptr = (int (*)(int, struct sockaddr *,socklen_t))dlsym( dlhandle , real_connect_name) ;
|
||||
if ( real_connect_ptr != NULL ) {
|
||||
printf("calling DMTCP _real_connect %s:%d\n", file , line) ;
|
||||
ret = (*real_connect_ptr)(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin)) ;
|
||||
} else {
|
||||
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
|
||||
}
|
||||
dlclose(dlhandle) ;
|
||||
} else {
|
||||
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
|
||||
}
|
||||
#else
|
||||
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
|
||||
#endif
|
||||
|
||||
if ( ret < 0) {
|
||||
trick_error_report(device->error_handler,TRICK_ERROR_ALERT, file, line, "%s: could not connect to host: %s\n", client_str, strerror(errno));
|
||||
|
Loading…
Reference in New Issue
Block a user