Resolved the segmentation fault issue when loading a checkpoint with HDF5 data recording enabled.

Resolved the segmentation fault issue when loading a checkpoint with HDF5 data recording enabled.
This commit is contained in:
Hong Chen 2025-02-02 21:55:29 -06:00
parent 40a4bdb847
commit bd65746a34
2 changed files with 78 additions and 42 deletions

View File

@ -37,16 +37,6 @@ PROGRAMMERS:
namespace Trick {
#ifdef HDF5
#ifndef TRICK_ICG
struct HDF5_INFO {
hid_t dataset;
Trick::DataRecordBuffer * drb ;
};
#endif
#endif
/**
The DRHDF5 recording format is an industry conforming HDF5 formatted file. Files written in this format are named
log_<group_name>.h5. The contents of this file type are readable by the Trick Data Products packages from
@ -56,6 +46,9 @@ namespace Trick {
@verbatim
GROUP "/" {
GROUP "header" {
DATASET "byte_order" {
"little_endian"
}
DATASET "file_names" {
"param_1_file_name", "param_2_file_name", etc...
}
@ -133,10 +126,29 @@ GROUP "/" {
protected:
#ifdef HDF5
std::vector<HDF5_INFO *> parameters; // trick_io(**)
/**
The HDF5 file handle.
*/
hid_t file; // trick_io(**)
/**
Root group and header group in the HDF5 file.
*/
hid_t root_group, header_group; // trick_io(**)
/**
Parameter names array to be used in the HDF5 packet table.
Each array item is a string of the parameter name that is
the copy of the reference name.
This is needed so when the dataset is closed, the reference
name in rec_buffer is still valid and won't cause double
deleting when variables are removed from rec_buffer.
*/
char** param_names; // trick_io(**)
/**
The dataset ids for each parameter.
*/
hid_t* param_dataset_ids; // trick_io(**)
#endif
} ;

View File

@ -38,7 +38,6 @@ int Trick::DRHDF5::format_specific_init() {
#ifdef HDF5
unsigned int ii ;
HDF5_INFO *hdf5_info ;
hsize_t chunk_size = 1024;
hid_t byte_id ;
hid_t file_names_id, param_types_id, param_units_id, param_names_id ;
@ -58,11 +57,21 @@ int Trick::DRHDF5::format_specific_init() {
return -1 ;
}
// Check file validity first
if (H5Iis_valid(file) <= 0) {
message_publish(MSG_ERROR, "File handle invalid, id=%lld\n", (long long)file);
return -1;
}
// All HDF5 objects live in the top-level "/" (root) group.
root_group = H5Gopen(file, "/", H5P_DEFAULT);
// Create a new group named "header" at the root ("/") level.
header_group = H5Gcreate(file, "/header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
// Validate header group
if (H5Iis_valid(header_group) <= 0) {
message_publish(MSG_ERROR, "Header group invalid, id=%lld\n", (long long)header_group);
return -1;
}
// Create a packet table (PT) that stores byte order.
byte_id = H5PTcreate_fl(header_group, "byte_order", s256, chunk_size, 1) ;
// Add the byte order value to the byte packet table.
@ -76,11 +85,14 @@ int Trick::DRHDF5::format_specific_init() {
// Create a packet table (PT) that stores each parameter's name.
param_names_id = H5PTcreate_fl(header_group, "param_names", s256, chunk_size, 1) ;
// Allocate memory for the parameter names
param_names = new char*[rec_buffer.size()];
// Allocate memory for the dataset ids
param_dataset_ids = new hid_t[rec_buffer.size()];
// Create a table for each requested parameter.
for (ii = 0; ii < rec_buffer.size(); ii++) {
hdf5_info = (HDF5_INFO *)malloc(sizeof(HDF5_INFO));
/* Case statements taken from "parameter_types.h."
* HDF5 Native types found in "H5Tpublic.h." */
switch (rec_buffer[ii]->ref->attr->type) {
@ -150,7 +162,6 @@ int Trick::DRHDF5::format_specific_init() {
#endif
break;
default:
free(hdf5_info);
continue;
}
@ -170,18 +181,19 @@ int Trick::DRHDF5::format_specific_init() {
* RETURN:
* Returns an identifier for the new packet table, or H5I_BADID on error.
*/
hdf5_info->dataset = H5PTcreate_fl(root_group, rec_buffer[ii]->ref->reference, datatype, chunk_size, 1) ;
if ( hdf5_info->dataset == H5I_BADID ) {
// Allocate memory for the parameter names
param_names[ii] = (char *)malloc(strlen(rec_buffer[ii]->ref->reference) + 1);
// Copy the parameter name to the list
strcpy(param_names[ii], rec_buffer[ii]->ref->reference);
// Create a packet table for each parameter
param_dataset_ids[ii] = H5PTcreate_fl(root_group, param_names[ii], datatype, chunk_size, 1) ;
// Validate the dataset
if ( param_dataset_ids[ii] == H5I_BADID ) {
message_publish(MSG_ERROR, "An error occured in data record group \"%s\" when adding \"%s\".\n",
group_name.c_str() , rec_buffer[ii]->ref->reference) ;
group_name.c_str() , param_names[ii]) ;
continue;
}
hdf5_info->drb = rec_buffer[ii] ;
/* Add the new parameter element to the end of the vector.
* This effectively increases the vector size by one. */
parameters.push_back(hdf5_info);
// As a bonus, add a header entry for each parameter.
/* File Name */
buf = "log_" + group_name ;
@ -241,27 +253,25 @@ int Trick::DRHDF5::write_data(bool must_write) {
// Test if the writer pointer to the right of the buffer pointer in the ring
if ( (writer_num % max_num) > (local_buffer_num % max_num) ) {
// we have 2 segments to write per variable
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
for (ii = 0; ii < rec_buffer.size(); ii++) {
unsigned int writer_offset = writer_num % max_num ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
/* Append all of the data on the end of the buffer to the packet table. */
H5PTappend( hi->dataset, max_num - writer_offset , buf );
H5PTappend( param_dataset_ids[ii], max_num - writer_offset , buf );
buf = hi->drb->buffer ;
buf = rec_buffer[ii]->buffer ;
/* Append all of the data at the beginning of the buffer to the packet table. */
H5PTappend( hi->dataset, local_buffer_num % max_num , buf );
H5PTappend( param_dataset_ids[ii], local_buffer_num % max_num , buf );
}
} else {
// we have 1 continous segment to write per variable
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
for (ii = 0; ii < rec_buffer.size(); ii++) {
unsigned int writer_offset = writer_num % max_num ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
/* Append all of the data to the packet table. */
H5PTappend( hi->dataset, local_buffer_num - writer_num , buf );
H5PTappend( param_dataset_ids[ii], local_buffer_num - writer_num , buf );
}
}
@ -290,16 +300,15 @@ int Trick::DRHDF5::format_specific_write_data(unsigned int writer_offset __attri
char *buf = 0;
/* Loop through each parameter. */
for (ii = 0; ii < parameters.size(); ii++) {
for (ii = 0; ii < rec_buffer.size(); ii++) {
/* Each parameters[] element contains a DataRecordBuffer class.
* So there is a seperate DataRecordBuffer per variable.
* Point to the value to be recorded. */
HDF5_INFO * hi = parameters[ii] ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
/* Append 1 value to the packet table. */
H5PTappend( hi->dataset, 1, buf );
H5PTappend( param_dataset_ids[ii], 1, buf );
}
#endif
@ -320,11 +329,26 @@ int Trick::DRHDF5::format_specific_shutdown() {
unsigned int ii ;
if ( inited ) {
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
H5PTclose( hi->dataset );
for (ii = 0; ii < rec_buffer.size(); ii++) {
// Free parameter names memory
free(param_names[ii]);
// Close the parameter dataset
if (param_dataset_ids[ii] != H5I_BADID) {
H5PTclose(param_dataset_ids[ii]);
}
}
// Free the parameter names array
delete[] param_names;
// Set the pointer to NULL
param_names = nullptr;
// Free the dataset ids array
delete[] param_dataset_ids;
// Set the pointer to NULL
param_dataset_ids = nullptr;
// Close root group
H5Gclose(root_group);
// Close file handle
H5Fclose(file);
}