Initial excision of DMTCP from Trick. #28

This commit is contained in:
Penn, John M 047828115 2021-08-27 19:38:56 -05:00
parent 94f20d426a
commit e487e5609f
58 changed files with 4637 additions and 4102 deletions

1
.gitattributes vendored
View File

@ -14,5 +14,4 @@ tutorial.doc export-ignore
users_guide.html export-ignore
MonteCarlo_Presentation.pptx export-ignore
Trick_07_to_10.pptx export-ignore
Trick_Checkpointing_DMTCP.pptx export-ignore
trick_source/data_products/DPX/test export-ignore

View File

@ -141,7 +141,6 @@ set( IO_SRC
${CMAKE_BINARY_DIR}/temp_src/io_src/io_CheckPointRestart.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_Clock.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_CommandLineArguments.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DMTCP.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRAscii.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRBinary.cpp
${CMAKE_BINARY_DIR}/temp_src/io_src/io_DRHDF5.cpp

View File

@ -29,7 +29,6 @@ SIM_SERV_DIRS = \
${TRICK_HOME}/trick_source/sim_services/CommandLineArguments \
${TRICK_HOME}/trick_source/sim_services/DataRecord \
${TRICK_HOME}/trick_source/sim_services/DebugPause \
${TRICK_HOME}/trick_source/sim_services/DMTCP \
${TRICK_HOME}/trick_source/sim_services/EchoJobs \
${TRICK_HOME}/trick_source/sim_services/Environment \
${TRICK_HOME}/trick_source/sim_services/EventManager \

View File

@ -49,50 +49,6 @@ AC_DEFUN([AX_HDF5_HOME],[
AC_SUBST([HDF5_HOME])
])
AC_DEFUN([AX_DMTCP_HOME],[
AC_ARG_WITH([dmtcp],
AS_HELP_STRING([--with-dmtcp@<:@=DIR@:>@], [DMTCP root directory]),
[DMTCP_HOME="$withval"
AS_IF([test "$DMTCP_HOME" = "yes"],
AC_CHECK_HEADER(dmtcpaware.h,[DMTCP_HOME="/usr"],AC_MSG_ERROR([could not find dmtcpaware.h])),
AC_CHECK_FILE([$DMTCP_HOME/include/dmtcpaware.h],
[],
AC_MSG_ERROR([could not find $DMTCP_HOME/include/dmtcpaware.h])
)
)],
[DMTCP_HOME=""]
)
AC_SUBST([DMTCP_HOME])
])
AC_DEFUN([AX_TPRO_HOME],[
AC_ARG_WITH([tpro],
AS_HELP_STRING([--with-tpro@<:@=DIR@:>@], [TPRO root directory]),
[TPRO_HOME="$withval"
AC_CHECK_FILE([$TPRO_HOME/linux/h/tpro.h],
[],
AC_MSG_ERROR([could not find $TPRO_HOME/linux/h/tpro.h])
)
],
[TPRO_HOME=""]
)
AC_SUBST([TPRO_HOME])
])
AC_DEFUN([AX_BC635_HOME],[
AC_ARG_WITH([bc635],
AS_HELP_STRING([--with-bc635@<:@=DIR@:>@], [BC635 root directory]),
[BC635_HOME="$withval"
AC_CHECK_FILE([$BC635_HOME/sample/bcuser.h],
[],
AC_MSG_ERROR([could not find $BC635_HOME/sample/bcuser.h])
)
],
[BC635_HOME=""]
)
AC_SUBST([BC635_HOME])
])
AC_DEFUN([AX_GSL_HOME],[
AC_ARG_WITH([gsl],
AS_HELP_STRING([--with-gsl@<:@=DIR@:>@], [GSL root directory]),
@ -571,13 +527,12 @@ AC_SUBST([UDUNITS_EXCLUDE])
dnl process the other optional command line arguments
AX_HDF5_HOME([])
AX_DMTCP_HOME([])
AX_TPRO_HOME([])
AX_BC635_HOME([])
AX_GSL_HOME([])
AX_GTEST_HOME([])
AX_ER7_UTILS([])
AC_OUTPUT([share/trick/makefiles/config_user.mk])
AC_CONFIG_FILES([share/trick/makefiles/config_user.mk])
AC_OUTPUT
printf "\033@<:@32mconfigure script successfully completed\033@<:@0m\n"

7260
configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -110,35 +110,6 @@ new_slave.chkpnt_load_auto = 0
in which case your Slave would have to have its own model code to perform a checkpoint dump/load.
If the Slave simulation was built with DMTCP enabled (for more information see TBD DMTCP Section), you can set the following variable
in the Master input file, causing the Slave to dump/load a binary DMTCP checkpoint when the Master commands:
```
new_slave.chkpnt_binary = 1
```
Note that when loading a binary DMTCP checkpoint, the checkpoint is a new executable file that must be run. When the
Master commands a checkpoint (and chkpnt_load_auto=1), the Slave will send the Master its checkpoint file name path
and then terminate itself. The Master will then automatically restart the Slave by executing that checkpoint file.
If chkpnt_load_auto=0, the user is responsible for terminating and restarting the Slave.
To run the Slave with DMTCP enabled, you'll have to modify some of the Slave's attributes in the Master input file.
- S_main_name normally defaults to your Slave's executable, which Trick fills in for you. But you will set it to instead run the dmtcp_checkpoint command.
- run_input_file should not change from what your Slave would normally run, but it needs the keyword "dmtcp" specified after it.
- sync_error_terminate needs to be 0 because the Slave must be terminated before the Master restarts it, and we don't want Master to also terminate.
- reconnect_wait_limit is the time the Master will then wait for the Slave to be restarted and get reconnected with the Master. If the Slave does not
reconnect within this time, the Master will deactivate the Slave for the rest of the simulation.
Here's an example:
```
new_slave.S_main_name = "/users/bob/dmtcp-1.2.7/bin/dmtcp_checkpoint --quiet --new-coordinator --checkpoint-open-files --ckptdir ./dmtcp_checkpoints ./S_main_Linux_4.4_x86_64.exe"
new_slave.run_input_file = "RUN_test/slave.py dmtcp"
new_slave.sync_error_terminate = 0
new_slave.reconnect_wait_limit = 10.0
```
When chkpnt_load_auto=1, the Slave restarting and reconnecting should occur within a second or two. If chkpnt_load_auto=0, the user has
to restart the slave himself (and may even be typing in the checkpoint executable on the command line), so reconnect_wait_limit should be
set accordingly.

View File

@ -57,21 +57,12 @@ namespace Trick {
/** Times to dump a checkpoint. Saved as simulation tics.\n */
std::priority_queue< long long, std::vector< long long >, std::greater< long long > > checkpoint_times ; /**< trick_units(--) */
/** Times to dump a dmtcp_checkpoint. Saved as simulation tics.\n */
std::priority_queue< long long, std::vector< long long >, std::greater< long long > > dmtcp_checkpoint_times ; /**< trick_units(--) */
/** Period to dump a recurring checkpoint. Saved as simulation tics.\n */
long long safestore_period ; /**< trick_units(--) */
/** Period to dump a recurring dmtcp checkpoint. Saved as simulation tics.\n */
long long dmtcp_safestore_period ; /**< trick_units(--) */
/** Next time to dump a recurring checkpoint. Saved as simulation tics.\n */
long long safestore_time ; /**< trick_units(--) */
/** Next time to dump a recurring dmtcp checkpoint. Saved as simulation tics.\n */
long long dmtcp_safestore_time ; /**< trick_units(--) */
/** If true take a pre_init_checkpoint\n */
bool pre_init_checkpoint ; /**< trick_units(--) */
@ -84,9 +75,6 @@ namespace Trick {
/** If true enable taking safestore checkpoints\n */
bool safestore_enabled ; /**< trick_units(--) */
/** If true enable taking safestore checkpoints\n */
bool dmtcp_safestore_enabled ; /**< trick_units(--) */
/** output_directory/checkpoint_file_name to dump for a checkpoint\n */
std::string output_file ; /**< ** */
@ -147,15 +135,6 @@ namespace Trick {
int set_safestore_enabled(bool yes_no) ;
/**
@brief @userdesc Command to set the dmtcp_safestore_enabled flag. If dmtcp_safestore_enabled is set
periodic checkpoints will be done according to dmtcp_safestore_period that was set in dmtcp_checkpoint_safestore().
The checkpointed file name is @e dmtcp_chkpnt_safestore.
@par Python Usage:
@code trick.dmtcp_checkpoint_safestore_set_enabled(<yes_no>) @endcode
@param yes_no - boolean yes (C integer 1) = dump periodic checkpoint, no (C integer 0) = do not dump
@return always 0
*/
int dmtcp_set_safestore_enabled(bool yes_no) ;
/**
@brief @userdesc Command to get the name of the checkpoint dump file.
@ -200,16 +179,6 @@ namespace Trick {
*/
int set_safestore_time(double in_time) ;
/**
@brief @userdesc Command to set the desired period that dmtcp safestore checkpoints will be dumped. (Sets dmtcp_safestore_period to the integral time tic value corresponding
to the incoming in_time so that checkpoint occurs periodically.)
@par Python Usage:
@code trick.dmtcp_checkpoint_safestore(<in_time>) @endcode
@param in_time - desired dmtcp safestore checkpoint time period in seconds.
@return always 0
*/
int dmtcp_set_safestore_time(double in_time) ;
/**
@brief @userdesc Command to dump a checkpoint now to the specified file.
Calls the MemoryManager checkpoint method with the string argument file_name
@ -235,34 +204,6 @@ namespace Trick {
*/
virtual int checkpoint(double in_time) ;
/**
@brief @userdesc Command to dump a checkpoint now to the specified file.
Calls the MemoryManager checkpoint method with the string argument file_name
and sim objects list string separated by "," to specify which sim objects need
checkpointing. If sim objects are not specified, all will be checkpointed.
@par Python Usage:
@code trick.dmtcp_checkpoint() @endcode
@param file_name - optional: name of checkpoint file to dump (default is "dmtcp_chkpnt_<time>")
@param print_status - optional: boolean yes (C integer 1) = print the dump checkpoint status message
@param obj_list_str - optional: sim objects list string for checkpointing (default is dump all)
@return always 0
*/
virtual int dmtcp_checkpoint(std::string file_name = "") ;
/**
@brief @userdesc Command to dump a checkpoint at in_time. (Sets checkpoint_time to the integral time tic value corresponding
to the incoming in_time so that checkpoint occurs once at that time at the end of the execution frame.)
The checkpointed file name is @e dmtcp_chkpnt_<in_time>.
@par Python Usage:
@code trick.checkpoint_bianry(<in_time>) @endcode
@param in_time - desired checkpoint time in seconds.
@return always 0
*/
virtual int dmtcp_checkpoint(double in_time) ;
/* helper function to generate DMTCP restart script file name */
void setDMTCPFilename( std::string file_name = "");
/**
* Executes the pre_init_checkpoint
* @return always 0
@ -289,14 +230,6 @@ namespace Trick {
*/
virtual int write_checkpoint() ;
/**
* Creates a file name based on the simulation time, "checkpoint_<time>" and
* calls checkpoint(string) routine with the filename
* @param sim_time_tics - current simulation time
* @return always 0
*/
virtual int write_dmtcp_checkpoint() ;
/**
* Creates a file name based on the simulation time, "checkpoint_<time>" and
* calls checkpoint(string) routine with the filename

View File

@ -46,15 +46,6 @@ const char * checkpoint_get_load_file() ;
/* checkpoint call accessible from C code */
int checkpoint( const char * file_name );
/* set dmtcp safestore_enabled flag */
int dmtcp_checkpoint_safestore(int yes_no) ;
/* dmtcp safestore checkpoint call accessible from C code */
int dmtcp_checkpoint_safestore_period( double in_period ) ;
/* dmtcp checkpoint call accessible from C code */
int dmtcp_checkpoint( const char * file_name );
/* checkpoint for specific sim objects call from C code */
int checkpoint_objects( const char * file_name, const char * objects ) ;

View File

@ -1,68 +0,0 @@
/*
PURPOSE:
(DMTCP)
*/
#ifndef DMTCP_HH
#define DMTCP_HH
#ifdef _DMTCP
#include "dmtcpaware.h"
#endif
#include <queue>
#include "trick/dmtcp_checkpoint_c_intf.hh"
#include "trick/Scheduler.hh"
namespace Trick {
class DMTCP : public Trick::Scheduler {
public:
DMTCP() ;
int init() ;
int freeze() ;
int pre() ;
int post() ;
int restart() ;
int call_dmtcp();
void dmtcpSetCheckpointNow() ;
void dmtcpJobQueue( std::string file_name );
void renameRestartScript();
void dmtcpSystemCmd( const std::string& str );
void dmtcpCleanup();
void dmtcpSafestoreDir();
void dmtcpRenameCmd();
bool isSpecialCharacter( const std::string& str );
std::string getScriptName();
std::string splitFilename( const std::string& str );
unsigned int dmtcp_ckpt_complete;
std::priority_queue<std::string> dmtcp_checkpoint_jobs_queue ; // ** ignore this for checkpointing
virtual int write_s_job_execution( FILE * fp ) ;
virtual int instrument_job_before(Trick::JobData * instrument_job) ;
virtual int instrument_job_after(Trick::JobData * instrument_job) ;
virtual int instrument_job_remove(std::string in_job) ;
private:
bool checkpoint_now ;
char prev_checkpoint_dirname[1024] ;
/** queue to hold jobs to be called before a checkpoint is dumped. */
Trick::ScheduledJobQueue pre_queue ; /* ** */
/** queue to hold jobs to be called after a checkpoint is dumped. */
Trick::ScheduledJobQueue post_queue ; /* ** */
/** queue to hold jobs to be called after a checkpoint is reloaded. */
Trick::ScheduledJobQueue restart_queue ; /* ** */
} ;
}
#endif

View File

@ -85,9 +85,6 @@ namespace Trick {
/** @brief Calls init() to create thread for writing simulation data to disk during restart. */
int restart() ;
/** @brief Calls restart of all data recording groups. */
int dmtcp_restart() ;
/** @brief Stop data recording during simulation shutdown. */
virtual int shutdown() ;

View File

@ -287,12 +287,6 @@ namespace Trick {
*/
virtual int restart() ;
/**
@brief Restart data recording, add data_record job to the executive scheduler.
@returns always 0
*/
virtual int dmtcp_restart() ;
/**
@brief Writes format specific information to the top line of the log header file.
@returns always 0

View File

@ -66,7 +66,7 @@ namespace Trick {
MSQ_DECLARE (master_command, MS_SIM_COMMAND)
MSQ_DECLARE (slave_command, MS_SIM_COMMAND)
// checkpoint data is not sent every frame, so dont need a queue
int slave_port; /**< trick_units(--) slave's dmtcp checkpoint port */
int slave_port; /**< trick_units(--) slave's checkpoint port */
char chkpnt_name[256]; /**< trick_units(--) checkpoint dir/filename */
} MSSharedMemData;
@ -140,7 +140,7 @@ namespace Trick {
virtual MS_SIM_COMMAND read_command() ;
/**
@brief Read a port number (i.e. dmtcp port) from the other simulation.
@brief Read a port number from the other simulation.
@return the port read or MS_ERROR_PORT if the read failed
*/
virtual int read_port() ;
@ -164,7 +164,7 @@ namespace Trick {
virtual int write_command(MS_SIM_COMMAND command) ;
/**
@brief Writes a port number (i.e. dmtcp port) to the other simulation.
@brief Writes a port number to the other simulation.
@return the number of bytes written
*/
virtual int write_port(int port) ;

View File

@ -98,7 +98,7 @@ namespace Trick {
virtual MS_SIM_COMMAND read_command() ;
/**
@brief Read a port number (i.e. dmtcp port) from the other simulation. Calls tc_read.
@brief Read a port number from the other simulation. Calls tc_read.
@return the port read or MS_ERROR_PORT if the read failed
*/
virtual int read_port() ;
@ -122,7 +122,7 @@ namespace Trick {
virtual int write_command(MS_SIM_COMMAND command) ;
/**
@brief Writes a port number (i.e. dmtcp port) to the other simulation. Calls tc_write.
@brief Writes a port number to the other simulation. Calls tc_write.
@return the number of bytes written
*/
virtual int write_port(int port) ;

View File

@ -60,12 +60,9 @@ namespace Trick {
in which case the master will no longer communicate with the slave.\n */
bool activated ; /**< trick_units(--) */
/** Indicates "dmtcp" or "ascii" slave. Used to contruct sync_port_tag (default is "undefined").\n*/
/** Indicates "ascii" slave. Used to contruct sync_port_tag (default is "undefined").\n*/
std::string slave_type; /**< trick_units(--) */
/** Slave's dmtcp port if slave_type "dmtcp" (=0 if slave_type "ascii").\n*/
long long dmtcp_port; /**< trick_units(--) */
/** @userdesc Which remote shell shall the master use to start the slave.\n
TRICK_SSH means use ssh (the default), TRICK_RSH means use rsh, TRICK_USER_REMOTE_SH means use custom.\n */
Trick::RemoteShell remote_shell; /**< trick_units(--) */
@ -92,16 +89,10 @@ namespace Trick {
/** @userdesc When master dumps a checkpoint, command the slave to dump a checkpoint (default=true).\n */
bool chkpnt_dump_auto; /**< trick_units(--) */
/** @userdesc When master loads a checkpoint, command the slave to load a checkpoint (default=true).\n
If chkpnt_binary is true, the slave will terminate and the master will load the slave's dmtcp checkpoint.\n */
/** @userdesc When master loads a checkpoint, command the slave to load a checkpoint (default=true).\n */
bool chkpnt_load_auto; /**< trick_units(--) */
/** @userdesc When master dumps/loads a checkpoint, this indicates the format of the slave checkpoint.\n
The default = false which means the typical trick ascii checkpoint. True means a dmtcp checkpoint.\n */
bool chkpnt_binary; /**< trick_units(--) */
/** @userdesc Send master's checkpoint file name to slave from here.\n
Also used to read dmtcp checkpoint file name from slave when loading dmtcp checkpoint.\n **/
/** @userdesc Send master's checkpoint file name to slave from here.\n **/
char chkpnt_name[256]; /**< trick_units(--) */
/** @userdesc The "RUN_<dir>/<input_file>" of the slave to use as the parameter to S_main_name.\n */
@ -166,12 +157,6 @@ namespace Trick {
*/
int write_master_chkpnt_name(std::string full_path_name) ;
/**
@brief Restart the slave's DMTCP executable after 1) killing its dmtcp_coordinator, and
2) disconnecting and starting a new master/slave socket connection.
@return always 0
*/
int restart_dmtcp_slave();
} ;
/**

View File

@ -36,7 +36,7 @@ namespace Trick {
/* True when this slave is enabled (default is false), see process_sim_args.\n */
bool enabled ; /**< trick_units(--) */
/* True when this slave has reconnected (dmtcp_restart) */
/* True when this slave has reconnected */
bool reconnected ; /**< trick_units(--) */
/* True when this slave has sent ReconnectCmd to master (default is false).\n */
@ -45,8 +45,7 @@ namespace Trick {
/* True when this slave has published a message that it has reconnected to master(default is false).\n */
bool msg_published; /**< trick_units(--) */
/** @userdesc Read in master's checkpoint file name here, used for forming slave's checkpoint file name.\n
Also used to send slave's dmtcp checkpoint file name to master when loading dmtcp checkpoint.\n **/
/** @userdesc Read in master's checkpoint file name here, used for forming slave's checkpoint file name.\n **/
char chkpnt_name[256]; /**< trick_units(--) */
/** @userdesc True means terminate the slave if it loses synchronization with the master.\n
@ -85,12 +84,6 @@ namespace Trick {
*/
int init() ;
/**
@brief Reconnect master/slave communications for dmtcp restart.
@return always 0
*/
int dmtcp_restart() ;
/**
@brief Read the master's checkpoint name.
@return the appropriate name to use for the current dump/load command

View File

@ -1 +0,0 @@
#include "trick/DMTCP.hh"

View File

@ -1 +0,0 @@
#include "trick/dmtcp_checkpoint_c_intf.hh"

View File

@ -1,38 +0,0 @@
/*
PURPOSE:
(DMTCP C interface)
*/
#ifndef DMTCP_CHECKPOINT_C_INTF_HH
#define DMTCP_CHECKPOINT_C_INTF_HH
#ifdef _DMTCP
#include "dmtcpaware.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* call DMTCP function dmtcpIsEnabled() */
int dmtcp_is_enabled() ;
/* call DMTCP funcion dmtcpRunCommand() */
int call_dmtcp();
/* DMTCP jobs to run pre, post, and restart. Arguments for dmtcpInstallHooks() - defined in dmtcp/src/dmtcpaware.c */
void dmtcp_pre_checkpoint() ;
void dmtcp_post_checkpoint() ;
void dmtcp_restart() ;
/* set DMTCP checkpoint flag */
void dmtcp_set_checkpoint_now() ;
/* queue to store dmtcp job names */
void dmtcp_job_queue( std::string file_name );
#ifdef __cplusplus
}
#endif
#endif

View File

@ -4,7 +4,6 @@
#include "trick/reference_frame.h"
#include "trick/GetTimeOfDayClock.hh"
#include "trick/CommandLineArguments.hh"
#include "trick/DMTCP.hh"
#include "trick/Executive.hh"
#include "trick/ExecutiveException.hh"
#include "trick/Environment.hh"

View File

@ -28,8 +28,6 @@ typedef enum {
MS_ReconnectCmd = 12, /* Master/Slave: slave reconnected after chkpnt load */
MS_ChkpntDumpAsciiCmd = 13, /* Master/Slave: master tells slave to dump an ascii chkpnt */
MS_ChkpntLoadAsciiCmd = 14, /* Master/Slave: master tells slave to load an ascii chkpnt */
MS_ChkpntDumpBinCmd = 15, /* Master/Slave: master tells slave to dump a binary chkpnt */
MS_ChkpntLoadBinCmd = 16 /* Master/Slave: master tells slave to load a binary chkpnt */
} MS_SIM_COMMAND;

View File

@ -122,8 +122,6 @@ typedef struct {
int socket_type ; /* ** Socket type SOCK_STREAM or SOCK_DGRAM */
TCCommFlag disabled; /* -- Connection enabled flag */
TCCommFlag disable_handshaking; /* -- Set this to skip handshaking in tc_connect and tc_accept. */
int dmtcp_use_real ; /* -- if true, use dlsym to lookup connect/accept calls. Used with DMTCP sockets
if false call connect/accept directly. */
double blockio_limit; /* s Block I/O timeout limit */
TCCommBlocking blockio_type; /* -- E.g. spinloops */
unsigned char byte_info[TC_BYTE_INFO_LENGTH]; /* ** byte swap info */

View File

@ -30,10 +30,6 @@ PLATFORM_LIBS = -lrt
LD_WHOLE_ARCHIVE := -Wl,-whole-archive
LD_NO_WHOLE_ARCHIVE := -Wl,-no-whole-archive
ifneq ($(DMTCP),)
PLATFORM_LIBS += -L$(DMTCP)/dmtcpaware -ldmtcpaware
endif
ifneq ($(TPROCTE),)
TRICK_ADDITIONAL_CXXFLAGS += -D_TPRO_CTE -I$(TPROCTE)/linux/h -I$(TPROCTE)/linux/tsync/h -I$(TPROCTE)/common/h -I$(TPROCTE)/common/tsync/h
PLATFORM_LIBS += $(TPROCTE)/linux/tsync/lib/libtsync.a $(TPROCTE)/linux/lib/libtpro.a

View File

@ -38,9 +38,6 @@ UDUNITS_LDFLAGS = @UDUNITS_LDFLAGS@
TRICK_EXCLUDE += :@UDUNITS_EXCLUDE@
HDF5 = @HDF5_HOME@
DMTCP = @DMTCP_HOME@
TPROCTE = @TPRO_HOME@
BC635 = @BC635_HOME@
GSL_HOME = @GSL_HOME@
GTEST_HOME = @GTEST_HOME@
USE_ER7_UTILS = @USE_ER7_UTILS@

View File

@ -56,9 +56,6 @@ GTEST_HOME = @GTEST_HOME@
PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
X_LIB_DIR = @X_LIB_DIR@
MOTIF_HOME = @MOTIF_HOME@
DMTCP = @DMTCP_HOME@
TPROCTE = @TPRO_HOME@
BC635 = @BC635_HOME@
USE_ER7_UTILS = @USE_ER7_UTILS@

View File

@ -19,7 +19,6 @@ a replacement SimObject will create an uncompilable sim.
#define TRICK_NO_MASTERSLAVE
#define TRICK_NO_INSTRUMENTATION
#define TRICK_NO_INTEGRATE
#define TRICK_NO_DMTCP
#define TRICK_NO_REALTIMEINJECTOR
#define TRICK_NO_ZEROCONF
#define TRICK_NO_UNITTEST
@ -33,7 +32,6 @@ a replacement SimObject will create an uncompilable sim.
##include <cstdarg>
##include "trick/SimObject.hh"
##include "trick/DMTCP.hh"
##include "trick/exec_proto.h"
##include "trick/exec_proto.hh"
##include "trick/Executive.hh"
@ -146,11 +144,6 @@ class SysSimObject : public Trick::SimObject {
//{TRK} P0 ("restart") sched.restart() ;
{TRK} P65534 ("restart") exec_collect_init() ;
#ifndef TRICK_NO_DMTCP
{TRK} ("dmtcp_restart") sched.write_s_job_execution(NULL) ;
{TRK} ("dmtcp_restart") sched.write_s_run_summary(NULL) ;
#endif
{TRK} ("system_moding") sched.sched_freeze_to_exec_command(false) ;
{TRK} ("end_of_frame") sched.sched_freeze_to_exec_command(true) ;
{TRK} ("end_of_frame") sched.async_freeze_to_exec_command() ;
@ -217,7 +210,6 @@ class CheckPointRestartSimObject : public Trick::SimObject {
{TRK} P1 ("initialization") cpr.write_pre_init_checkpoint() ;
{TRK} P65535 ("initialization") cpr.write_post_init_checkpoint() ;
{TRK} P0 ("system_checkpoint") cpr.write_checkpoint() ;
{TRK} P0 ("top_of_frame") cpr.write_dmtcp_checkpoint() ;
{TRK} P0 ("system_checkpoint") cpr.safestore_checkpoint() ;
{TRK} P0 ("shutdown") cpr.write_end_checkpoint() ;
@ -285,9 +277,6 @@ class MessageSimObject : public Trick::SimObject {
{TRK} ("exec_time_tic_changed") mpublisher.init() ;
{TRK} P1 ("restart") mdevice.restart() ;
#ifndef TRICK_NO_DMTCP
{TRK} P1 ("dmtcp_restart") mdevice.restart() ;
#endif
{TRK} ("shutdown") mtcout.shutdown() ;
{TRK} ("shutdown") mdevice.shutdown() ;
@ -463,10 +452,6 @@ class VariableServerSimObject : public Trick::SimObject {
#endif
{TRK} P1 ("initialization") Trick::launch_all_external_applications();
#ifndef TRICK_NO_DMTCP
{TRK} P1 ("dmtcp_restart") vs.restart() ;
{TRK} P1 ("dmtcp_restart") Trick::launch_all_external_applications();
#endif
{TRK} ("preload_checkpoint") vs.suspendPreCheckpointReload();
{TRK} ("restart") vs.restart();
{TRK} ("restart") vs.resumePostCheckpointReload();
@ -535,9 +520,6 @@ class DataRecordDispatcherSimObject : public Trick::SimObject {
{TRK} ("end_of_frame") drd.signal_thread() ;
{TRK} ("preload_checkpoint") drd.preload_checkpoint() ;
{TRK} ("restart") drd.restart() ;
#ifndef TRICK_NO_DMTCP
{TRK} ("dmtcp_restart") drd.dmtcp_restart() ;
#endif
{TRK} ("shutdown") drd.shutdown() ;
}
@ -567,9 +549,6 @@ class RTSyncSimObject : public Trick::SimObject {
{TRK} P65535 ("initialization") rt_sync.get_sim_end_init_time() ;
{TRK} P65535 ("restart") rt_sync.restart(exec_get_time_tics()) ;
#ifndef TRICK_NO_DMTCP
{TRK} P65535 ("dmtcp_restart") rt_sync.restart(exec_get_time_tics()) ;
#endif
{TRK} ("freeze_init") rt_sync.freeze_init(exec_get_freeze_frame()) ;
{TRK} P65535 ("freeze") rt_sync.freeze_pause(exec_get_freeze_frame()) ;
@ -638,10 +617,6 @@ class MasterSlaveSimObject : public Trick::SimObject {
{TRK} P0 ("initialization") slave.init() ;
{TRK} ("checkpoint") master.checkpoint() ;
{TRK} ("preload_checkpoint") master.preload_checkpoint() ;
#ifndef TRICK_NO_DMTCP
{TRK} ("dmtcp_pre") master.checkpoint() ;
{TRK} ("dmtcp_restart") slave.dmtcp_restart() ;
#endif
{TRK} P65534 ("end_of_frame") master.end_of_frame_status_from_slave() ; // must occur BEFORE rt_monitor
{TRK} P65535 ("end_of_frame") master.end_of_frame_status_to_slave() ; // must occur AFTER rt_monitor
@ -727,21 +702,6 @@ class IntegLoopSimObject : public Trick::SimObject {
#endif
#endif
#ifndef TRICK_NO_DMTCP
class DMTCPSimObject : public Trick::SimObject {
public:
Trick::DMTCP dmtcp ;
DMTCPSimObject() {
exec_register_scheduler(&dmtcp) ;
{TRK} ("initialization") dmtcp.init() ;
{TRK} ("freeze") dmtcp.freeze() ;
}
}
DMTCPSimObject trick_dmtcp ;
#endif
#ifndef TRICK_NO_REALTIMEINJECTOR
class InjectorExecSimObject : public Trick::SimObject {
public:

View File

@ -34,7 +34,6 @@ if hasattr(top.cvar, 'trick_message'):
# from checkpoint restart
if hasattr(top.cvar, 'trick_cpr'):
checkpoint = top.cvar.trick_cpr.cpr.checkpoint
dmtcp_checkpoint = top.cvar.trick_cpr.cpr.dmtcp_checkpoint
load_checkpoint = top.cvar.trick_cpr.cpr.load_checkpoint
# from the input processor

View File

@ -45,11 +45,6 @@ class SysSimObject : public Trick::SimObject {
//{TRK} P0 ("restart") sched.restart() ;
{TRK} P65534 ("restart") exec_collect_init() ;
#ifndef TRICK_NO_DMTCP
{TRK} ("dmtcp_restart") sched.write_s_job_execution(NULL) ;
{TRK} ("dmtcp_restart") sched.write_s_run_summary(NULL) ;
#endif
{TRK} ("system_moding") sched.sched_freeze_to_exec_command(false) ;
{TRK} ("end_of_frame") sched.sched_freeze_to_exec_command(true) ;
{TRK} ("end_of_frame") sched.async_freeze_to_exec_command() ;

View File

@ -1,29 +0,0 @@
import trick
from trick.sim_objects import *
def main():
trick.sim_control_panel_set_enabled(True)
trick.exec_set_freeze_command(True)
trick.real_time_enable()
trick.exec_set_software_frame(0.0125)
trick.frame_log_on()
trick.dmtcp_checkpoint("test_checkpoint")
trick.dmtcp_checkpoint(0)
trick.dmtcp_checkpoint(3)
trick.dmtcp_checkpoint(9)
trick.add_read(25, "trick.dmtcp_checkpoint()")
trick.dmtcp_checkpoint_safestore_set_enabled(True)
trick.dmtcp_checkpoint_safestore(15)
my_integ_loop.getIntegrator(trick.Runge_Kutta_2, 4);
if __name__ == "__main__":
main()

View File

@ -13,9 +13,6 @@ new_connection = trick.MSSocket()
new_slave = trick.SlaveInfo()
new_slave.set_connection_type(new_connection)
new_slave.sim_path = trick.get_trick_env("TRICK_HOME") + "/trick_sims/SIM_Ball++_L1"
#if trick.dmtcp_is_enabled() == 1:
# new_slave.S_main_name = "dmtcp_checkpoint --checkpoint-open-files ./S_main_${TRICK_HOST_CPU}.exe"
#print "hey " , trick.dmtcp_is_enabled()
new_slave.run_input_file = "RUN_slave/slave.py"
new_slave.sync_error_terminate = 1
trick_master_slave.master.add_slave(new_slave)

View File

@ -171,7 +171,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
private static String host;
private static int port = -1;
private static boolean isRestartOptionOn;
private static boolean isDmtcpOptionOn;
// The object of SimState that has Sim state data.
private SimState simState;
@ -278,20 +277,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
actionController.handleFreeze(debug_flag);
}
@Action
public void dumpDMTCPChkpnt() {
if ( isDmtcpOptionOn ) {
String chkpt_dir = simState.getRunPath();
String fileName = "dmtcp_chkpnt_" + simState.getTwoFractionFormatted(simState.getExecOutTime());
actionController.handleDumpDMTCPChkpnt(chkpt_dir, fileName, getMainFrame());
runtimeStatePanel.setTitle("Dumping DMTCP Checkpoint");
currentSimStatusDesc = "PreCheckpoint";
}
}
@Action
public void shutdownSim() {
actionController.handleShutdown();
@ -692,15 +677,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
commandLine = matcher.replaceAll("");
}
// check to see if -dmtcp is used
Pattern dmtcpOptionPattern = Pattern.compile("(\\-dmtcp)(,|$)");
Matcher dmtcp_matcher = dmtcpOptionPattern.matcher(commandLine);
if (dmtcp_matcher.find()) {
isDmtcpOptionOn = true;
commandLine = dmtcp_matcher.replaceAll("");
}
// check to see if -auto_exit is used
Pattern autoExitOptionPattern = Pattern.compile("(\\-auto\\_exit)(,|$)");
Matcher autoExitMatcher = autoExitOptionPattern.matcher(commandLine);
@ -995,13 +971,8 @@ public class SimControlApplication extends TrickApplication implements PropertyC
titledCommandsPanel.setContentContainer(commandsPanel);
GridLayout gridLayout = null;
if (!isDmtcpOptionOn) {
// 2 columns and 5 rows, each component has the same width and height.
gridLayout = new GridLayout(5,2,2,4);
} else {
// 2 columns and 6 rows
gridLayout = new GridLayout(6,2,2,4);
}
commandsPanel.setLayout(gridLayout);
@ -1052,12 +1023,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
commandsPanel.add(liteButton);
if (isDmtcpOptionOn) {
commandsPanel.add(new JButton(getAction("dumpDMTCPChkpnt")));
dumpChkpntASCIIButton.setText("Dump ASCII Chkpnt");
loadChkpntButton.setText("Load ASCII Chkpnt");
}
commandsPanel.add(new JButton(getAction("quit")));
return titledCommandsPanel;
@ -1231,18 +1196,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
return statusBar;
}
/**
* Get the process id of DMTCP
*/
public void isDmtcpRunning() {
if (isDmtcpOptionOn) {
setActionsEnabled("dumpDMTCPChkpnt",true);
} else {
setActionsEnabled("dumpDMTCPChkpnt",false);
}
}
/**
* Updates the GUI as needed if SIM states are changed.
*/
@ -1293,7 +1246,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
} else {
enableAllCommands();
setActionsEnabled("freezeSim,quit", false);
isDmtcpRunning(); /* if DMTCP is not running, disable button on control panel */
}
logoImagePanel.pause();
break;
@ -1303,7 +1255,6 @@ public class SimControlApplication extends TrickApplication implements PropertyC
disableAllCommands();
setActionsEnabled("freezeSim,lite", true);
if (debug_flag != 0) {
setActionsEnabled("stepSim,dumpDMTCPChkpnt", true);
setActionsEnabled("stepSim,dumpChkpntASCII", true);
}
logoImagePanel.resume();
@ -1349,7 +1300,7 @@ public class SimControlApplication extends TrickApplication implements PropertyC
ArrayList<String> actions = new ArrayList<String>();
actions.add("stepSim,recordingSim,startSim,realtime,freezeSim," +
"dumpDMTCPChkpnt,dumpChkpntASCII,shutdownSim,loadChkpnt,lite,quit");
"dumpChkpntASCII,shutdownSim,loadChkpnt,lite,quit");
return actions.toArray(new String[0]);
}

View File

@ -349,32 +349,6 @@ public class SimControlActionController {
}
/**
* Invoked when the user presses Dump DMTCP Chkpnt button.
* @param dir directory to dump checkpoint
* @param fileName name of checkpoint file
* @param dialogParent dialogParent
*/
public void handleDumpDMTCPChkpnt(String dir, String fileName, Component dialogParent) {
File selectedFile = UIUtils.chooseSaveFile(dir, fileName, null, dialogParent);
if (selectedFile != null) {
String parentPath = selectedFile.getParent();
if ( !dir.equals(parentPath) ) {
System.out.println("Path was not updated! DMTCP checkpoints are saved in the --chkpt directory specified in s_main_dmtcp." );
}
try {
simcom.put("trick.dmtcp_checkpoint(\"" + selectedFile.getName() + "\")\n");
} catch (IOException e) {}
}
}
/**
* Invoked when the user presses Shutdown button.
*/

View File

@ -56,8 +56,6 @@ realtime.Action.shortDescription = Realtime on/off
freezeSim.Action.text = Freeze
freezeSim.Action.shortDescription = Freeze/Stop the simulation
dumpDMTCPChkpnt.Action.text = Dump DMTCP Chkpnt
dumpDMTCPChkpnt.Action.shortDescription = Dump DMTCP Checkpoint
shutdownSim.Action.text = Shutdown
shutdownSim.Action.shortDescription = Shutdown Simulation

View File

@ -17,8 +17,6 @@ set( SS_SRC
Collect/collect
CommandLineArguments/CommandLineArguments
CommandLineArguments/command_line_c_intf
DMTCP/DMTCP
DMTCP/dmtcp_checkpoint_c_intf
DataRecord/DRAscii
DataRecord/DRBinary
DataRecord/DRHDF5

View File

@ -9,11 +9,6 @@
#include <sys/stat.h>
#include <string.h>
#ifdef _DMTCP
#include "dmtcpaware.h"
#endif
#include "trick/DMTCP.hh"
#include "trick/CheckPointRestart.hh"
#include "trick/MemoryManager.hh"
#include "trick/SimObject.hh"
@ -77,11 +72,6 @@ int Trick::CheckPointRestart::set_safestore_enabled(bool yes_no) {
return(0) ;
}
int Trick::CheckPointRestart::dmtcp_set_safestore_enabled(bool yes_no) {
dmtcp_safestore_enabled = yes_no ;
return(0) ;
}
int Trick::CheckPointRestart::set_cpu_num(int in_cpu_num) {
if ( in_cpu_num <= 0 ) {
cpu_num = -1 ;
@ -159,25 +149,6 @@ int Trick::CheckPointRestart::set_safestore_time(double in_time) {
return(0) ;
}
int Trick::CheckPointRestart::dmtcp_set_safestore_time(double in_time) {
long long software_frame_tics ;
if ( in_time < 10 ) {
std::cout << "\nA DMTCP Safestore Interval less than 10 seconds is not recommended.\n\n";
}
dmtcp_safestore_period = (long long)(in_time * exec_get_time_tic_value()) ;
software_frame_tics = exec_get_software_frame_tics() ;
if ( dmtcp_safestore_period % software_frame_tics ) {
dmtcp_safestore_period = ((dmtcp_safestore_time / software_frame_tics) + 1 ) * software_frame_tics ;
}
dmtcp_safestore_time = dmtcp_safestore_period ;
return(0) ;
}
int Trick::CheckPointRestart::checkpoint(std::string file_name, bool print_status, std::string obj_list_str ) {
// first, empty the sim obj list to make sure there is nothing left from last time
@ -286,82 +257,6 @@ int Trick::CheckPointRestart::write_checkpoint() {
return(0) ;
}
void Trick::CheckPointRestart::setDMTCPFilename( std::string file_name __attribute__((unused))) {
#ifdef _DMTCP
if (!file_name.compare("") ) {
std::stringstream file_name_stream ;
SIM_MODE sim_mode ;
sim_mode = exec_get_mode() ;
if (sim_mode == Initialization)
file_name_stream << "dmtcp_chkpnt_init";
else
file_name_stream << "dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
file_name = file_name_stream.str() ;
}
dmtcp_job_queue( file_name );
#endif
}
int Trick::CheckPointRestart::dmtcp_checkpoint( std::string file_name __attribute__((unused))) {
#ifdef _DMTCP
SIM_MODE sim_mode ;
sim_mode = exec_get_mode() ;
setDMTCPFilename( file_name );
dmtcp_set_checkpoint_now();
//fix for what I believe is a dmtcp bug: --DANNY
// When you do a dmtcp_checkpoint from sim control panel (in freeze), the var server calls parse()
// in the input processor which calls this function -- IPParse locks ip_mutex before
// the parse call and unlocks it after the parse call. BUT if you call_dmtcp right now
// the checkpoint can occur while ip_mutex is still locked. When you do a dmtcp restart,
// the sim hangs because it cannot unlock ip_mutex (dmtcp claims to handle mutexes, doh!).
// SO, don't do call_dmtcp here in freeze, it will be called in the dmtcp freeze job. This "fixes"
// the problem, although the var server is a thread so we're technically still vulnerable.
if ((sim_mode != Initialization) && (sim_mode != Freeze))
call_dmtcp();
#endif
return 0;
}
int Trick::CheckPointRestart::dmtcp_checkpoint( double in_time __attribute__((unused))) {
#ifdef _DMTCP
long long curr_time = exec_get_time_tics() ;
long long new_time = (long long)(in_time * exec_get_time_tic_value()) ;
if ( new_time >= curr_time )
dmtcp_checkpoint_times.push( new_time ) ;
#endif
return 0 ;
}
int Trick::CheckPointRestart::write_dmtcp_checkpoint() {
#ifdef _DMTCP
long long curr_time = exec_get_time_tics() ;
if ( (!dmtcp_checkpoint_times.empty() ) and (curr_time == dmtcp_checkpoint_times.top()) ) {
dmtcp_checkpoint();
while ( !dmtcp_checkpoint_times.empty() and (dmtcp_checkpoint_times.top() == curr_time) )
dmtcp_checkpoint_times.pop() ;
}
if ( (dmtcp_safestore_enabled) && (curr_time == dmtcp_safestore_time) ){
dmtcp_checkpoint("dmtcp_chkpnt_safestore");
dmtcp_safestore_time += dmtcp_safestore_period ;
}
#endif
return 0;
}
int Trick::CheckPointRestart::write_pre_init_checkpoint() {
if ( pre_init_checkpoint ) {
checkpoint(std::string("chkpnt_pre_init")) ;

View File

@ -70,15 +70,6 @@ extern "C" int checkpoint_safestore( int yes_no ) {
return(0) ;
}
/**
* @relates Trick::CheckPointRestart
* @copydoc Trick::CheckPointRestart::dmtcp_set_safestore_enabled
*/
extern "C" int dmtcp_checkpoint_safestore( int yes_no ) {
the_cpr->dmtcp_set_safestore_enabled(bool(yes_no)) ;
return(0) ;
}
/**
* @relates Trick::CheckPointRestart
* @copydoc Trick::CheckPointRestart::set_safestore_time
@ -88,15 +79,6 @@ extern "C" int checkpoint_safestore_period( double in_time ) {
return(0) ;
}
/**
* @relates Trick::CheckPointRestart
* @copydoc Trick::CheckPointRestart::dmtcp_set_safestore_time
*/
extern "C" int dmtcp_checkpoint_safestore_period( double in_time ) {
the_cpr->dmtcp_set_safestore_time(in_time) ;
return(0) ;
}
/**
* @relates Trick::CheckPointRestart
* @copydoc Trick::CheckPointRestart::set_cpu_num
@ -130,21 +112,6 @@ extern "C" const char * checkpoint_get_load_file() {
}
/**
* @relates Trick::CheckPointRestart
@brief @userdesc Command to dump a checkpoint now to the specified file.
This is invoked when the user clicks the "Dump DMTCP Chkpnt" button on the sim control panel.
@par Python Usage:
@code trick.dmtcp_checkpoint("<file_name>") @endcode
@param file_name - name of checkpoint file to dump (leave blank and Trick will use filename "dmtcp_chkpnt_<simtime>"
@return always 0
*/
extern "C" int dmtcp_checkpoint( const char * file_name __attribute__((unused))) {
the_cpr->dmtcp_checkpoint( std::string(file_name) );
return(0) ;
}
/**
* @relates Trick::CheckPointRestart
@brief @userdesc Command to dump a checkpoint now to the specified file.

View File

@ -2,6 +2,3 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefi
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
-include Makefile_deps
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -20,8 +20,6 @@ object_${TRICK_HOST_CPU}/next_attr_name.o: next_attr_name.cpp \
${TRICK_HOME}/include/trick/CheckPointRestart_c_intf.hh
object_${TRICK_HOST_CPU}/stl_type_name_convert.o: stl_type_name_convert.cpp
object_${TRICK_HOST_CPU}/CheckPointRestart.o: CheckPointRestart.cpp \
${TRICK_HOME}/include/trick/DMTCP.hh \
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
${TRICK_HOME}/include/trick/Scheduler.hh \
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
${TRICK_HOME}/include/trick/JobData.hh \

View File

@ -2,6 +2,3 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefi
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
-include Makefile_deps
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -1,333 +0,0 @@
#include <iostream>
#include <libgen.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <string.h>
#include <sstream>
#include <fstream>
#include <iomanip>
#ifdef _DMTCP
#include "dmtcpaware.h"
#endif
#include "trick/DMTCP.hh"
#include "trick/SimObject.hh"
#include "trick/exec_proto.h"
#include "trick/message_proto.h"
#include "trick/Executive.hh"
#include "trick/CheckPointRestart.hh"
using namespace std;
Trick::DMTCP * the_dmtcp ;
Trick::DMTCP::DMTCP() {
int num_classes = 0 ;
checkpoint_now = false ;
dmtcp_ckpt_complete = 0;
class_map["dmtcp_pre"] = num_classes ;
class_to_queue[num_classes++] = &pre_queue ;
class_map["dmtcp_post"] = num_classes ;
class_to_queue[num_classes++] = &post_queue ;
class_map["dmtcp_restart"] = num_classes ;
class_to_queue[num_classes++] = &restart_queue ;
the_dmtcp = this ;
}
int Trick::DMTCP::init() {
#ifdef _DMTCP
if( dmtcpIsEnabled() )
dmtcpInstallHooks(dmtcp_pre_checkpoint, dmtcp_post_checkpoint, dmtcp_restart);
#endif
call_dmtcp();
return 0 ;
}
int Trick::DMTCP::freeze() {
call_dmtcp();
return 0;
}
int Trick::DMTCP::pre() {
JobData * curr_job ;
pre_queue.reset_curr_index() ;
while ( (curr_job = pre_queue.get_next_job()) != NULL ) {
curr_job->call() ;
}
return 0 ;
}
int Trick::DMTCP::post() {
JobData * curr_job ;
renameRestartScript();
post_queue.reset_curr_index() ;
while ( (curr_job = post_queue.get_next_job()) != NULL )
curr_job->call() ;
return 0 ;
}
int Trick::DMTCP::restart() {
JobData * curr_job ;
restart_queue.reset_curr_index() ;
while ( (curr_job = restart_queue.get_next_job()) != NULL )
curr_job->call() ;
while ( !dmtcp_checkpoint_jobs_queue.empty())
dmtcp_checkpoint_jobs_queue.pop();
return 0 ;
}
int Trick::DMTCP::call_dmtcp() {
if ( checkpoint_now == true ) {
dmtcp_ckpt_complete = 0;
sleep(1); // this sleep() is required to give flag
// enough time to refresh in Trick View
// it does not cause overruns
#ifdef _DMTCP
if( dmtcpIsEnabled() )
dmtcpRunCommand('c') ;
#endif
checkpoint_now = false ;
}
return 0 ;
}
void Trick::DMTCP::dmtcpSetCheckpointNow() {
checkpoint_now = true ;
}
std::string Trick::DMTCP::splitFilename( const string& str ) {
size_t found;
std::string filename;
found=str.find_last_of("/\\");
filename = str.substr(found+1);
return filename;
}
bool Trick::DMTCP::isSpecialCharacter( const std::string& str) {
size_t found = str.find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_.-#%@=:^" );
if (found!=string::npos)
return true;
else
return false;
}
void Trick::DMTCP::dmtcpJobQueue( std::string file_name ) {
dmtcp_checkpoint_jobs_queue.push( file_name ) ;
if ( (dmtcp_checkpoint_jobs_queue.size() >= 2) and (strcmp(file_name.c_str(),"dmtcp_chkpnt_safestore") ) )
dmtcp_checkpoint_jobs_queue.pop() ;
}
std::string Trick::DMTCP::getScriptName() {
std::string restart_script_name ;
#ifdef _DMTCP
std::stringstream default_script_name ;
// Default script name
default_script_name << "dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
// Script name from memory manager wrapper (input file or gui)
std::string mmw_script_name = dmtcp_checkpoint_jobs_queue.top();
// User specified a new directory
if ( strstr(mmw_script_name.c_str(),"/") != NULL ){
std::string stripped_name = splitFilename( mmw_script_name );
std::cout << "DMTCP checkpoints are written to the --ckptdir directory (defined in $TRICK_HOME/bin/s_define_dmtcp).\n";
mmw_script_name = stripped_name;
}
// User specifed a valid script name (or is using default script name)
if ( ( mmw_script_name == default_script_name.str() ) or ( !isSpecialCharacter( mmw_script_name ) ) )
restart_script_name = mmw_script_name;
// User specifed an invalid script name
else {
std::cout << mmw_script_name.c_str() << " is not a valid name. The default DMTCP script name will be used: " << default_script_name.str() << endl;
restart_script_name = default_script_name.str();
}
#endif
return restart_script_name;
}
void Trick::DMTCP::dmtcpRenameCmd() {
#ifdef _DMTCP
std::stringstream dmtcp_rename_command;
std::stringstream no_env_dmtcp_rename_command;
std::stringstream trick_restart_script;
std::stringstream dmtcp_restart_script;
std::stringstream no_env_dmtcp_restart_script;
std::string mmw_filename = getScriptName();
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
if (lsp->genId) {
dmtcp_restart_script << lsp->env << "/dmtcp_restart_script_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << ".sh";
no_env_dmtcp_restart_script << "./dmtcp_restart_script_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << ".sh";
}
else {
dmtcp_restart_script << lsp->env << "/dmtcp_restart_script_" << lsp->uniquePidStr << ".sh";
no_env_dmtcp_restart_script << "./dmtcp_restart_script_" << lsp->uniquePidStr << ".sh";
}
trick_restart_script << lsp->env << "/" << mmw_filename;
dmtcp_rename_command << "mv " << dmtcp_restart_script.str() << " " << trick_restart_script.str();
no_env_dmtcp_rename_command << "mv " << no_env_dmtcp_restart_script.str() << " " << trick_restart_script.str() << "; rm -f ./dmtcp_restart_script.sh";
ifstream dmtcp_file( dmtcp_restart_script.str().c_str() );
if ( dmtcp_file.good() ) {
dmtcpSystemCmd( dmtcp_rename_command.str() );
}
// There is a bug in DMTCP that causes the DMTCP Coordinator (see dmtcp::DmtcpCoordinator::writeRestartScript())
// to not recognize ENV_VAR_CHECKPOINT_DIR when dumping a checkpoint from a restarted checkpoint.
// As a result, DMTCP writes the restart script to the current directory.
// This code just checks to see if it's in the current dir, if so, move it to the ENV_VAR_CHECKPOINT_DIR with the new script name.
ifstream no_env_dmtcp_file( no_env_dmtcp_restart_script.str().c_str() );
if ( no_env_dmtcp_file.good() ) {
dmtcpSystemCmd( no_env_dmtcp_rename_command.str() );
}
dmtcpSafestoreDir();
// Don't print out "dumped" message until checkpont file (ckpt_*.dmtcp) is written
std::stringstream dmtcp_temp;
dmtcp_temp << lsp->env << "/ckpt_" << lsp->uniquePidStr << "_" << std::setfill('0') << std::setw(5) << lsp->genId << "/ckpt_S_main_" << getenv("TRICK_HOST_CPU") << ".exe_" << lsp->uniquePidStr << ".dmtcp.temp";
while( access( dmtcp_temp.str().c_str(),F_OK ) == 0 ) {}
dmtcp_ckpt_complete = 1;
message_publish(MSG_INFO, "Dumped DMTCP Checkpoint: %s\n", mmw_filename.c_str()) ;
#endif
}
void Trick::DMTCP::dmtcpSystemCmd( const string& str ) {
std::string real_system_name ;
void* dlhandle ;
void (*real_system_ptr)(const char *) = NULL ;
dlhandle = dlopen( NULL, RTLD_LAZY) ;
real_system_name = "_real_system" ;
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
if ( real_system_ptr != NULL )
(*real_system_ptr)( str.c_str() ) ;
else
system( str.c_str() );
}
void Trick::DMTCP::dmtcpSafestoreDir() {
#ifdef _DMTCP
std::string restart_script_name;
char* checkpoint_dirname ;
char* long_checkpoint_name ;
char rm_safestore_dir[512] ;
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
long_checkpoint_name = strdup(lsp->checkpointFilename) ;
// For DMTCP Safestore checkpoints, only keep newest ckpt_* directory
if ( restart_script_name.c_str() == "dmtcp_chkpnt_safestore" ) {
checkpoint_dirname = dirname(long_checkpoint_name) ;
// If there's a new ckpt_* directory, remove the previous version
if ( strcmp(prev_checkpoint_dirname, checkpoint_dirname) ) {
sprintf( rm_safestore_dir, "cd %s; rm -rf %s", lsp->env, prev_checkpoint_dirname);
dmtcpSystemCmd( rm_safestore_dir );
}
strcpy( prev_checkpoint_dirname, checkpoint_dirname );
}
#else
strcpy( prev_checkpoint_dirname, "" );
#endif
}
void Trick::DMTCP::dmtcpCleanup() {
#ifdef _DMTCP
char dmtcp_restart_script_sh[1024];
const DmtcpLocalStatus * lsp = dmtcpGetLocalStatus() ;
sprintf( dmtcp_restart_script_sh, "%s/dmtcp_restart_script.sh", lsp->env );
unlink(dmtcp_restart_script_sh) ;
if ( !dmtcp_checkpoint_jobs_queue.empty() )
dmtcp_checkpoint_jobs_queue.pop();
#endif
}
void Trick::DMTCP::renameRestartScript() {
#ifdef _DMTCP
if( dmtcpIsEnabled() ) {
dmtcpRenameCmd();
dmtcpCleanup();
}
#endif
}
int Trick::DMTCP::write_s_job_execution( FILE * fp ) {
if ( fp == NULL ) {
return(0) ;
}
fprintf(fp, "\n===================================================================================================\n") ;
fprintf(fp, "DMTCP :\n\n") ;
write_non_sched_queue(fp, &pre_queue) ;
write_non_sched_queue(fp, &post_queue) ;
write_non_sched_queue(fp, &restart_queue) ;
return 0 ;
}
int Trick::DMTCP::instrument_job_before(Trick::JobData * instrument_job __attribute__((unused)) ) {
return 0 ;
}
int Trick::DMTCP::instrument_job_after(Trick::JobData * instrument_job __attribute__((unused)) ) {
return 0 ;
}
int Trick::DMTCP::instrument_job_remove(std::string in_job __attribute__((unused)) ) {
return 0 ;
}

View File

@ -1,7 +0,0 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
-include Makefile_deps
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -1,27 +0,0 @@
object_${TRICK_HOST_CPU}/DMTCP.o: DMTCP.cpp ${TRICK_HOME}/include/trick/DMTCP.hh \
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
${TRICK_HOME}/include/trick/Scheduler.hh \
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
${TRICK_HOME}/include/trick/JobData.hh \
${TRICK_HOME}/include/trick/InstrumentBase.hh \
${TRICK_HOME}/include/trick/SimObject.hh \
${TRICK_HOME}/include/trick/exec_proto.h \
${TRICK_HOME}/include/trick/sim_mode.h \
${TRICK_HOME}/include/trick/message_proto.h \
${TRICK_HOME}/include/trick/message_type.h \
${TRICK_HOME}/include/trick/Executive.hh \
${TRICK_HOME}/include/trick/Scheduler.hh \
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
${TRICK_HOME}/include/trick/SimObject.hh \
${TRICK_HOME}/include/trick/Threads.hh \
${TRICK_HOME}/include/trick/ThreadBase.hh \
${TRICK_HOME}/include/trick/sim_mode.h \
${TRICK_HOME}/include/trick/CheckPointRestart.hh
object_${TRICK_HOST_CPU}/dmtcp_checkpoint_c_intf.o: dmtcp_checkpoint_c_intf.cpp \
${TRICK_HOME}/include/trick/DMTCP.hh \
${TRICK_HOME}/include/trick/dmtcp_checkpoint_c_intf.hh \
${TRICK_HOME}/include/trick/Scheduler.hh \
${TRICK_HOME}/include/trick/ScheduledJobQueue.hh \
${TRICK_HOME}/include/trick/JobData.hh \
${TRICK_HOME}/include/trick/InstrumentBase.hh \
${TRICK_HOME}/include/trick/SimObject.hh

View File

@ -1,69 +0,0 @@
#include <iostream>
#ifdef _DMTCP
#include "dmtcpaware.h"
#endif
#include "trick/DMTCP.hh"
/** Global singleton pointer to DMTCP */
extern Trick::DMTCP * the_dmtcp ;
#ifdef _DMTCP
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::dmtcpIsEnabled()
*/
extern "C" int dmtcp_is_enabled() {
return dmtcpIsEnabled() ;
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::dmtcp_pre_checkpoint()
*/
extern "C" void dmtcp_pre_checkpoint() {
the_dmtcp->pre() ;
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::dmtcp_post_checkpoint()
*/
extern "C" void dmtcp_post_checkpoint() {
the_dmtcp->post() ;
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::restart()
*/
extern "C" void dmtcp_restart() {
the_dmtcp->restart() ;
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::call_dmtcp()
*/
extern "C" int call_dmtcp() {
the_dmtcp->call_dmtcp();
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::dmtcpSetCheckpointNow()
*/
extern "C" void dmtcp_set_checkpoint_now() {
the_dmtcp->dmtcpSetCheckpointNow() ;
}
/**
* @relates Trick::DMTCP
* @copydoc Trick::DMTCP::dmtcpJobQueue()
*/
extern "C" void dmtcp_job_queue( std::string file_name ) {
the_dmtcp->dmtcpJobQueue( file_name );
}
#endif

View File

@ -15,11 +15,6 @@
#include <sys/syscall.h>
#endif
#if _DMTCP
#include <dlfcn.h>
#include "dmtcpaware.h"
#endif
#include "trick/DataRecordDispatcher.hh"
#include "trick/exec_proto.h"
#include "trick/exec_proto.hh"
@ -80,27 +75,7 @@ int Trick::DataRecordDispatcher::remove_files() {
std::string command;
command = std::string("/bin/rm -rf ") + command_line_args_get_output_dir() + std::string("/log_*") ;
#ifdef _DMTCP
if( dmtcpIsEnabled() ) {
std::string real_system_name ;
void* dlhandle ;
void (*real_system_ptr)(const char *) = NULL ;
dlhandle = dlopen( NULL, RTLD_LAZY) ;
real_system_name = "_real_system" ;
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
if ( real_system_ptr != NULL ) {
printf("\nDataRecordDispatcher::remove_files() calling DMTCP _real_system \"%s\"\n" , command.c_str()) ;
(*real_system_ptr)(command.c_str()) ;
} else {
system(command.c_str());
}
dlclose(dlhandle) ;
} else {
system(command.c_str());
}
#else
system(command.c_str());
#endif
return 0 ;
}
@ -254,19 +229,6 @@ int Trick::DataRecordDispatcher::restart() {
return 0 ;
}
/**
@details
-# Call the restart job for all of the groups.
*/
int Trick::DataRecordDispatcher::dmtcp_restart() {
unsigned int ii ;
for ( ii = 0 ; ii < groups.size() ; ii++ ) {
groups[ii]->dmtcp_restart() ;
}
return 0 ;
}
/**
@details
-# If the thread was started,

View File

@ -107,9 +107,6 @@ Trick::DataRecordGroup::DataRecordGroup( std::string in_name ) :
add_job(0, 5, (char *)"restart", NULL, 1.0, (char *)"restart", (char *)"TRK", 60001) ;
add_job(0, 6, (char *)"shutdown", NULL, 1.0, (char *)"shutdown", (char *)"TRK") ;
// (Alex 1/15/14) The dmtcp_restart job is called by the DataRecordDispatcher... is the
// dispatcher necessary anymore?
write_job = add_job(0, 99, (char *)job_class.c_str(), NULL, cycle, (char *)"data_record" , (char *)"TRK") ;
add_time_variable() ;
@ -527,14 +524,6 @@ int Trick::DataRecordGroup::restart() {
return 0 ;
}
/**
@details
-# This restart is targetted at DMTCP restarts where we only need to rewrite the header.
*/
int Trick::DataRecordGroup::dmtcp_restart() {
return write_header() ;
}
int Trick::DataRecordGroup::write_header() {
unsigned int jj ;
@ -546,9 +535,6 @@ int Trick::DataRecordGroup::write_header() {
out_stream.open(header_name.c_str(), std::fstream::out ) ;
if ( ! out_stream || ! out_stream.good() ) {
#ifndef _DMTCP
message_publish(MSG_ERROR, "Can't open Data Record file %s.\n", header_name.c_str()) ;
#endif
return -1;
}

View File

@ -11,7 +11,3 @@ $(OBJ_DIR)/io_DRHDF5.o: TRICK_CXXFLAGS += -I$(HDF5)/include
endif
TRICK_CXXFLAGS += -DHDF5
endif
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -6,10 +6,6 @@
#include <vector>
#include <cstring>
#if _DMTCP
#include "dmtcpaware.h"
#endif
#include "trick/ExternalApplication.hh"
#include "trick/ExternalApplicationManager.hh"
#include "trick/variable_server_proto.h"
@ -170,45 +166,10 @@ void Trick::ExternalApplication::launch() {
argv = command_line_args_get_argv() ;
oss << command << " " << arguments.str() << " " << create_arguments_string() ;
if (argc > 2) {
for (int i=0;i<argc;i++) {
if (!strcmp(argv[i], "dmtcp")) {
oss << " -dmtcp" ;
break ;
}
}
}
oss << " &";
#ifdef _DMTCP
if( dmtcpIsEnabled() ) {
std::string real_system_name ;
void* dlhandle ;
void (*real_system_ptr)(const char *) = NULL ;
dlhandle = dlopen( NULL, RTLD_LAZY) ;
real_system_name = "_real_system" ;
real_system_ptr = (void (*)(const char *))dlsym( dlhandle , real_system_name.c_str()) ;
if ( real_system_ptr != NULL ) {
std::cout << "\nExternalApplication::launch() calling DMTCP _real_system \"" << oss.str() << "\n";
(*real_system_ptr)(oss.str().c_str()) ;
} else {
std::cout << "calling DMTCP \"" << oss.str().c_str() << "\"" << std::endl;
system(oss.str().c_str());
}
dlclose(dlhandle) ;
} else {
system(oss.str().c_str());
}
#else
std::cout << oss.str() << std::endl;
system(oss.str().c_str());
#endif
}
}

View File

@ -1,7 +1,3 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
-include Makefile_deps
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -90,7 +90,6 @@ int Trick::MSSharedMem::connect() {
tsm_dev.size = sizeof(MSSharedMemData);
ret = tsm_init(&tsm_dev);
} else {
// handle reconnecting for dmtcp restart
ret = tsm_reconnect(&tsm_dev);
}
shm_addr = (MSSharedMemData*) tsm_dev.addr;

View File

@ -48,14 +48,6 @@ std::string Trick::MSSocket::add_sim_args( std::string slave_type ) {
/** @li create a unique identifier based on the <machine>_<current pid>. */
gethostname(master_host, (size_t) 80);
/** @li if master is running with dmtcp slave or vice versa, use "_dmtcp_multiconnect_tag"
for sync_port_tag. on restart, dmtcp will retain original slave pid, which will not
match restarted master pid. sync_port_tag must match for master and slave(s) to
connect/reconnect via tc_multiconnect() */
if (slave_type == "dmtcp")
temp_stream << master_host << "_dmtcp_multiconnect_tag" ;
else
temp_stream << master_host << "_" << getpid() ;
sync_port_tag = temp_stream.str() ;

View File

@ -1,7 +1,3 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../share/trick/makefiles/Makefile.common
include ${TRICK_HOME}/share/trick/makefiles/Makefile.tricklib
-include Makefile_deps
ifneq ($(DMTCP),)
TRICK_CXXFLAGS += -D_DMTCP -I$(DMTCP)/dmtcpaware
endif

View File

@ -47,7 +47,6 @@ Trick::SlaveInfo::SlaveInfo() {
reconnect_count = 0;
chkpnt_dump_auto = true ;
chkpnt_load_auto = true ;
chkpnt_binary = false ;
}
int Trick::SlaveInfo::set_connection_type(Trick::MSConnect * in_connection) {
@ -145,10 +144,6 @@ int Trick::SlaveInfo::start() {
if ( ! run_input_file.empty() ) {
startup_command << " " << run_input_file ;
/** @li check to see if master is running with dmtcp slave */
if (run_input_file.find("dmtcp") != std::string::npos)
slave_type = "dmtcp";
}
/** @li Add the connection specific arguments to the startup command */
@ -234,15 +229,11 @@ int Trick::SlaveInfo::read_slave_status() {
}
else {
message_publish(MSG_WARNING, "Slave is exiting.\n") ;
// if reconnect_wait_limit is set, master waits for slave to reconnect (e.g. dmtcp restarting)
// if reconnect_wait_limit is set, master waits for slave to reconnect
if (reconnect_wait_limit > 0.0) {
message_publish(MSG_WARNING, "Master will wait %f seconds for slave to reconnect.\n", reconnect_wait_limit) ;
// make reads (shared mem connection) return quickly so we don't overrun waiting for reconnect
// TODO: for socket connection we will overrun in the accept call (see restart_dmtcp_slave)
connection->set_sync_wait_limit(exec_get_freeze_frame());
if (chkpnt_binary) {
restart_dmtcp_slave(); // restart the slave dmtcp executable
}
}
else {
message_publish(MSG_WARNING, "reconnect_wait_limit: 0.0 - Master will stop communicating with slave.\n") ;
@ -251,13 +242,6 @@ int Trick::SlaveInfo::read_slave_status() {
return(0) ;
}
break ;
case (MS_ChkpntLoadBinCmd):
// slave has received our load command and is now sending us his dmtcp port and checkpoint file name
dmtcp_port = connection->read_port() ;
connection->read_name(chkpnt_name, sizeof(chkpnt_name)); // dir/filename
message_publish(MSG_WARNING , "Master received DMTCP Port and Checkpoint Filename from slave.\n");
connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ; // send this as an ack so slove can shut down
break ;
case (MS_FreezeCmd):
/** @li if the current slave is freezing, freeze the master too */
message_publish(MSG_INFO, "Slave is freezing.\n") ;
@ -288,12 +272,6 @@ int Trick::SlaveInfo::write_master_status() {
/** @li write the current exec_command according to the master to the slave */
connection->write_command((MS_SIM_COMMAND)exec_get_exec_command()) ;
}
if ((MS_SIM_COMMAND)exec_get_exec_command() == MS_ChkpntLoadBinCmd) {
// dmtcp slave will exit, so stop writing status to slave until it reconnects
// reconnect_count prevents us from writing status to slave, & is incremented every freeze cycle until we have reconnected
reconnect_count = 1;
}
return(0) ;
}
@ -460,22 +438,10 @@ int Trick::Master::checkpoint() {
std::string full_path_name = checkpoint_get_output_file();
for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
if (slaves[ii]->chkpnt_dump_auto) {
if (slaves[ii]->chkpnt_binary) {
if (slaves[ii]->slave_type == "dmtcp") {
exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpBinCmd) ;
slaves[ii]->write_master_status() ;
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
exec_set_exec_command(save_command) ;
} else {
message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot dump binary checkpoint.\n") ;
slaves[ii]->write_master_status() ;
}
} else { // ascii
exec_set_exec_command((SIM_COMMAND)MS_ChkpntDumpAsciiCmd) ;
slaves[ii]->write_master_status() ;
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
exec_set_exec_command(save_command) ;
}
} else { // no auto dump
slaves[ii]->write_master_status() ;
}
@ -496,22 +462,10 @@ int Trick::Master::preload_checkpoint() {
std::string full_path_name = checkpoint_get_load_file();
for ( ii = 0 ; ii < slaves.size() ; ii++ ) {
if (slaves[ii]->chkpnt_load_auto) {
if (slaves[ii]->chkpnt_binary) {
if (slaves[ii]->slave_type == "dmtcp") {
exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadBinCmd) ;
slaves[ii]->write_master_status() ;
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
exec_set_exec_command(save_command) ;
} else {
message_publish(MSG_ERROR, "Slave is not running under dmtcp control so it cannot load binary checkpoint.\n") ;
slaves[ii]->write_master_status() ;
}
} else { // ascii
exec_set_exec_command((SIM_COMMAND)MS_ChkpntLoadAsciiCmd) ;
slaves[ii]->write_master_status() ;
slaves[ii]->write_master_chkpnt_name(full_path_name) ;
exec_set_exec_command(save_command) ;
}
} else { // no auto load
slaves[ii]->write_master_status() ;
}
@ -549,92 +503,6 @@ int Trick::Master::shutdown() {
return(0) ;
}
int Trick::SlaveInfo::restart_dmtcp_slave() {
#ifdef _DMTCP
FILE *fp;
char *dmtcp_path, line[256];
std::string config_file;
std::string dmtcp_command;
std::stringstream dmtcp_port_str;
pid_t pid, dmtcp_pid;
/** @par Detailed Design: */
if ( enabled ) {
if (slave_type != "dmtcp") {
message_publish(MSG_ERROR, "Cannot auto-start slave because it was not running under dmtcp control.\n") ;
return(0);
}
/** @li If chkpnt_load_auto is specified, restart the slave by executing the user-supplied chkpnt_name... */
if (chkpnt_load_auto) {
if (chkpnt_name[0] == MS_ERROR_NAME) {
message_publish(MSG_WARNING, "Cannot auto-start slave because master did not receive chkpnt_name from slave.\n");
} else {
/** @li First kill slave's dmtcp_coordinator because sometimes it does not quit like it's supposed to. */
if (dmtcp_port > 0) { // slave sends 0 if it can't get the port num from the environment
/** @li Get dmtcp path from trick's configure output file (dmtcp is only supported in linux). */
config_file = std::string(getenv("TRICK_HOME")) + "/config_Linux.mk";
if ((fp = fopen(config_file.c_str() , "r")) != NULL ) {
while (fgets(line, sizeof(line), fp) != NULL) {
if (strncmp(line, "DMTCP", 5)==0) {
dmtcp_path = strchr(line, '/');
dmtcp_path[strlen(dmtcp_path)-1] = '\0'; // remove newline character
break;
}
}
}
/** @li Issue a dmtcp_command to kill the dmtcp_coordinator. */
fprintf(stderr, "Master attempting to kill slave's dmtcp_coordinator port= %ld"
" (it may not exist, that's ok)\n", dmtcp_port);
//dmtcp_command.str(""); // reset our command string
dmtcp_command = dmtcp_path + std::string("/bin/dmtcp_command");
if (access(dmtcp_command.c_str(), F_OK) != 0) {
fprintf(stderr, "\nCould not find %s in order to kill the dmtcp_coordinator.\n",
dmtcp_command.c_str());
} else {
//dmtcp_command << " --quiet -p " << dmtcp_port << " q";
message_publish(MSG_WARNING, "Restarting DMTCP coordinator\n");
if((dmtcp_pid = fork()) == 0) {
setsid();
dmtcp_port_str << dmtcp_port;
int execReturn = execl(dmtcp_command.c_str(), "dmtcp_command", "--quiet", "-p", dmtcp_port_str.str().c_str(), "q", NULL);
_Exit(0);
} else {
int f_status = 0;
if(dmtcp_pid > 0) {
waitpid(dmtcp_pid, &f_status, 0);
} else {
message_publish(MSG_ERROR, "Unable to send DMTCP restart command\n");
}
}
//system(dmtcp_command.str().c_str());
}
} // end if dmtcp_port > 0
/** @li Finally invoke the slave's dmtcp checkpoint script. */
message_publish(MSG_WARNING, "Auto-starting slave: %s.\n", chkpnt_name);
if ((pid = fork()) == 0) {
setsid();
std::istringstream sChkpnt(chkpnt_name);
std::string fileName;
while (std::getline(sChkpnt, fileName, '/'));
//fprintf(stderr, "------> Starting: %s\n", fileName.c_str());
int execReturn = execl(chkpnt_name, fileName.c_str(), NULL);
_Exit(0);
}
}
} // end chkpnt_auto
/** @li If our connection is a socket, disconnect the socket and call accept again */
if (dynamic_cast<MSSocket*>(connection)) {
connection->disconnect();
//TODO: this will block until slave restarts, possibly causing overruns in freeze mode
connection->accept();
}
reconnect_count = 0; // start writing status to slave again
}
#endif
return(0) ;
}
/**
* @relates Trick::Master
* C binded function to toggle the master/slave synchronization flag to on.

View File

@ -39,16 +39,6 @@ int Trick::Slave::process_sim_args() {
return(0) ;
}
int Trick::Slave::dmtcp_restart() {
if ( enabled ) {
reconnected = true ;
connection->disconnect();
connection->connect();
}
return(0) ;
}
int Trick::Slave::init() {
std::string rts_disable_name ;
@ -132,17 +122,6 @@ std::string Trick::Slave::get_checkpoint_name(MS_SIM_COMMAND command) {
file_name_stream << "chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
}
}
#ifdef _DMTCP
// dmtcp_checkpoint() only wants a filename -- no dir path
if (command == MS_ChkpntDumpBinCmd) {
if (chkpnt_name[0] != MS_ERROR_NAME) {
file_name_stream << "dmtcp_" << std::string(strrchr(chkpnt_name, '/')+1); // dmtcp_ + filename
//std::cout << "----> Slave: parsed checkpoint file name: " << file_name_stream.str() << std::endl;
} else {
file_name_stream << ""; // dmtcp will create default name
}
}
#endif
// ascii load_checkpoint() wants the dir/filename path
if (command == MS_ChkpntLoadAsciiCmd) {
dir = command_line_args_get_output_dir(); // run dir
@ -152,22 +131,6 @@ std::string Trick::Slave::get_checkpoint_name(MS_SIM_COMMAND command) {
file_name_stream << dir << "/chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
}
}
#ifdef _DMTCP
// dmtcp load wants full dir/filename path that will be sent to the master
if (command == MS_ChkpntLoadBinCmd) {
dir = getenv("DMTCP_CHECKPOINT_DIR"); // env variable set by dmtcp
if (chkpnt_name[0] != MS_ERROR_NAME) {
file_name_stream << dir << "/dmtcp_" << std::string(strrchr(chkpnt_name, '/')+1); // dmtcp dir / filename
} else { // create default name
file_name_stream << dir << "/dmtcp_chkpnt_" << std::fixed << std::setprecision(2) << exec_get_sim_time() ;
}
if (file_name_stream.str().length() > sizeof(chkpnt_name)-1) {
message_publish(MSG_ERROR, "Slave could not send checkpoint name to master because name too long (max = %d).\n",
sizeof(chkpnt_name)) ;
file_name_stream << MS_ERROR_NAME; // send error character
}
}
#endif
return(file_name_stream.str()) ;
}
@ -178,10 +141,6 @@ int Trick::Slave::end_of_frame() {
MS_SIM_COMMAND command ;
MS_SIM_COMMAND slave_command ;
std::string chkpt_name_str;
#ifdef _DMTCP
int dmtcp_port;
std::string dmtcp_port_str;
#endif
/** @par Detailed Design */
if ( (enabled) and (activated) ){
@ -252,13 +211,6 @@ int Trick::Slave::end_of_frame() {
chkpt_name_str = get_checkpoint_name(MS_ChkpntDumpAsciiCmd);
checkpoint(chkpt_name_str.c_str());
break;
case (MS_ChkpntDumpBinCmd): // Master tells slave to dump a binary checkpoint
message_publish(MSG_WARNING , "Slave received Checkpoint Dump Binary command from master.\n") ;
#ifdef _DMTCP
chkpt_name_str = get_checkpoint_name(MS_ChkpntDumpBinCmd);
dmtcp_checkpoint(chkpt_name_str.c_str());
#endif
break;
/** @li if reading the master mode command returned a checkpoint load command, load a checkpoint */
case (MS_ChkpntLoadAsciiCmd): // Master tells slave to load an ascii checkpoint
message_publish(MSG_WARNING , "Slave received Checkpoint Load command from master.\n") ;
@ -266,27 +218,6 @@ int Trick::Slave::end_of_frame() {
load_checkpoint(chkpt_name_str.c_str()); // load done in freeze or end_of_frame job
//load_checkpoint_job(); // do the load NOW
break;
case (MS_ChkpntLoadBinCmd): // Master tells slave to load a binary checkpoint
message_publish(MSG_WARNING , "Slave received Checkpoint Load Binary command from master.\n") ;
#ifdef _DMTCP
chkpt_name_str = get_checkpoint_name(MS_ChkpntLoadBinCmd);
strcpy(chkpnt_name, chkpt_name_str.c_str());
// write the dmtcp_coordinator port to the master so it can kill the coordinator when restarting slave
dmtcp_port = 0;
dmtcp_port_str= getenv("DMTCP_PORT"); // env variable set by dmtcp
if (dmtcp_port_str.length() > 0) {
sscanf(dmtcp_port_str.c_str(), "%d", &dmtcp_port);
}
// this tells master we are sending port number and file name next
connection->write_command(MS_ChkpntLoadBinCmd) ;
connection->write_port(dmtcp_port) ;
connection->write_name(chkpnt_name, sizeof(chkpnt_name)) ;
// this is just an ack so we know master received port before we kill socket by shutting down
command = connection->read_command() ;
fprintf(stderr, "SLAVE GOT ACK %d\n", command);
exec_terminate_with_return(0, __FILE__, __LINE__, "YOU MUST NOW RUN SLAVE'S DMTCP CHECKPOINT FILE!.");
#endif
break;
default:
/** @li if reading the master mode command returned an Executive mode, set the slave mode command to the master mode command */
exec_set_exec_command((SIM_COMMAND)command) ;

View File

@ -72,7 +72,6 @@ void * Trick::MessageTCDeviceListenThread::thread_body() {
new_connection->disable_handshaking = TC_COMM_TRUE ;
new_connection->blockio_limit = 0.0 ;
new_connection->blockio_type = TC_COMM_BLOCKIO ;
new_connection->dmtcp_use_real = 1 ;
new_connection->client_id = 0 ;
strcpy(new_connection->client_tag, "") ;
new_connection->error_handler = (TrickErrorHndlr *) calloc(1, (int)sizeof(TrickErrorHndlr));

View File

@ -43,7 +43,6 @@ Trick::VariableServerThread::VariableServerThread(TCDevice * in_listen_dev) :
connection.blockio_limit = 0.0 ;
connection.blockio_type = TC_COMM_BLOCKIO ;
connection.client_id = 0 ;
connection.dmtcp_use_real = 1 ;
strcpy(connection.client_tag, "") ;
connection.error_handler = (TrickErrorHndlr *) calloc(1, (int)sizeof(TrickErrorHndlr));
connection.error_handler->report_level = TRICK_ERROR_CAUTION;

View File

@ -47,10 +47,6 @@
#ifdef HDF5
#include "trick/DRHDF5.hh"
#endif
#ifdef DMTCP
#include "trick/DMTCP.hh"
#include "trick/dmtcp_checkpoint_c_intf.hh"
#endif
#include "trick/DataRecordDispatcher.hh"
#include "trick/data_record_proto.h"
#include "trick/DebugPause.hh"

View File

@ -13,12 +13,6 @@
#include "trick/tc_proto.h"
#include "trick/trick_byteswap.h"
#if _DMTCP
#include <dlfcn.h>
#include "dmtcpaware.h"
#endif
int tc_accept_(TCDevice * listen_device, TCDevice * device, const char *file, int line)
{
socklen_t length;
@ -35,27 +29,7 @@ int tc_accept_(TCDevice * listen_device, TCDevice * device, const char *file, in
memset(&s_in, 0, sizeof(struct sockaddr_in)) ;
/* Accept On Listen Device */
length = sizeof(s_in);
#if _DMTCP
if( dmtcpIsEnabled() && device->dmtcp_use_real ) {
const char real_accept_name[] = "_real_accept" ;
void* dlhandle ;
int (*real_accept_ptr)( int, struct sockaddr *,socklen_t *) = NULL ;
dlhandle = dlopen( NULL, RTLD_LAZY) ;
real_accept_ptr = (int (*)(int, struct sockaddr *,socklen_t *))dlsym( dlhandle , real_accept_name) ;
if ( real_accept_ptr != NULL ) {
printf("calling DMTCP _real_accept %s:%d\n", file , line) ;
the_socket = (*real_accept_ptr)(listen_device->socket, (struct sockaddr *) &s_in, &length) ;
} else {
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
}
dlclose(dlhandle) ;
} else {
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
}
#else
the_socket = accept(listen_device->socket, (struct sockaddr *) &s_in, &length);
#endif
sprintf(client_str, "(ID = %d tag = %s)", listen_device->client_id, listen_device->client_tag);

View File

@ -16,12 +16,6 @@
#include "trick/tc_proto.h"
#include "trick/trick_byteswap.h"
#if _DMTCP
#include <dlfcn.h>
#include "dmtcpaware.h"
#endif
int tc_connect_(TCDevice * device, const char *file, int line)
{
struct sockaddr_in sockin;
@ -120,27 +114,7 @@ int tc_connect_(TCDevice * device, const char *file, int line)
/*
* Establish the connection to the selected server
*/
#if _DMTCP
if( dmtcpIsEnabled() && device->dmtcp_use_real ) {
const char real_connect_name[] = "_real_connect" ;
void* dlhandle ;
int (*real_connect_ptr)( int, struct sockaddr *,socklen_t) = NULL ;
dlhandle = dlopen( NULL, RTLD_LAZY) ;
real_connect_ptr = (int (*)(int, struct sockaddr *,socklen_t))dlsym( dlhandle , real_connect_name) ;
if ( real_connect_ptr != NULL ) {
printf("calling DMTCP _real_connect %s:%d\n", file , line) ;
ret = (*real_connect_ptr)(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin)) ;
} else {
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
}
dlclose(dlhandle) ;
} else {
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
}
#else
ret = connect(the_socket, (struct sockaddr *) &sockin, (socklen_t) sizeof(sockin));
#endif
if ( ret < 0) {
trick_error_report(device->error_handler,TRICK_ERROR_ALERT, file, line, "%s: could not connect to host: %s\n", client_str, strerror(errno));