Resolved the segmentation fault issue when loading a checkpoint with HDF5 data recording enabled. (#1837)

* Resolved the segmentation fault issue when loading a checkpoint with HDF5 data recording enabled.

Resolved the segmentation fault issue when loading a checkpoint with HDF5 data recording enabled.

* Fixed bitfield for hdf5

Fixed bitfield for hdf5 recording.
This commit is contained in:
Hong Chen 2025-03-11 11:00:12 -05:00 committed by GitHub
parent d98eef4b66
commit fd6df3cf33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 296 additions and 62 deletions

View File

@ -37,16 +37,6 @@ PROGRAMMERS:
namespace Trick {
#ifdef HDF5
#ifndef TRICK_ICG
struct HDF5_INFO {
hid_t dataset;
Trick::DataRecordBuffer * drb ;
};
#endif
#endif
/**
The DRHDF5 recording format is an industry conforming HDF5 formatted file. Files written in this format are named
log_<group_name>.h5. The contents of this file type are readable by the Trick Data Products packages from
@ -56,6 +46,9 @@ namespace Trick {
@verbatim
GROUP "/" {
GROUP "header" {
DATASET "byte_order" {
"little_endian"
}
DATASET "file_names" {
"param_1_file_name", "param_2_file_name", etc...
}
@ -133,10 +126,29 @@ GROUP "/" {
protected:
#ifdef HDF5
std::vector<HDF5_INFO *> parameters; // trick_io(**)
/**
The HDF5 file handle.
*/
hid_t file; // trick_io(**)
/**
Root group and header group in the HDF5 file.
*/
hid_t root_group, header_group; // trick_io(**)
/**
Parameter names array to be used in the HDF5 packet table.
Each array item is a string of the parameter name that is
the copy of the reference name.
This is needed so when the dataset is closed, the reference
name in rec_buffer is still valid and won't cause double
deleting when variables are removed from rec_buffer.
*/
char** param_names; // trick_io(**)
/**
The dataset ids for each parameter.
*/
hid_t* param_dataset_ids; // trick_io(**)
#endif
} ;

View File

@ -0,0 +1,43 @@
global DR_GROUP_ID
global drg
try:
if DR_GROUP_ID >= 0:
DR_GROUP_ID += 1
except NameError:
DR_GROUP_ID = 0
drg = []
drg.append(trick.DRHDF5("DR_bitfieldsHDF5"))
drg[DR_GROUP_ID].set_freq(trick.DR_Always)
drg[DR_GROUP_ID].set_cycle(0.1)
drg[DR_GROUP_ID].set_single_prec_only(False)
drg[DR_GROUP_ID].add_variable("drx.drt.charB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.charB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.charB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.charB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.intB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.intB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.intB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.intB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.shortB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.shortB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.shortB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.shortB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.ucharB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.ucharB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.ucharB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.ucharB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.uintB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.uintB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.uintB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.uintB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.ushortB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.ushortB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.ushortB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.ushortB.var4")
drg[DR_GROUP_ID].add_variable("drx.drt.mixB.var1")
drg[DR_GROUP_ID].add_variable("drx.drt.mixB.var2")
drg[DR_GROUP_ID].add_variable("drx.drt.mixB.var3")
drg[DR_GROUP_ID].add_variable("drx.drt.mixB.var4")
trick.add_data_record_group(drg[DR_GROUP_ID], trick.DR_Buffer)
drg[DR_GROUP_ID].enable()

View File

@ -0,0 +1,38 @@
global DR_GROUP_ID
global drg
try:
if DR_GROUP_ID >= 0:
DR_GROUP_ID += 1
except NameError:
DR_GROUP_ID = 0
drg = []
drg.append(trick.DRHDF5("DR_typesHDF5"))
drg[DR_GROUP_ID].set_freq(trick.DR_Always)
drg[DR_GROUP_ID].set_cycle(0.1)
drg[DR_GROUP_ID].set_single_prec_only(False)
drg[DR_GROUP_ID].add_variable("drx.drt.a")
drg[DR_GROUP_ID].add_variable("drx.drt.b")
drg[DR_GROUP_ID].add_variable("drx.drt.c")
drg[DR_GROUP_ID].add_variable("drx.drt.d")
drg[DR_GROUP_ID].add_variable("drx.drt.e")
drg[DR_GROUP_ID].add_variable("drx.drt.f")
drg[DR_GROUP_ID].add_variable("drx.drt.g")
drg[DR_GROUP_ID].add_variable("drx.drt.h")
drg[DR_GROUP_ID].add_variable("drx.drt.i")
drg[DR_GROUP_ID].add_variable("drx.drt.j")
drg[DR_GROUP_ID].add_variable("drx.drt.k")
drg[DR_GROUP_ID].add_variable("drx.drt.l")
drg[DR_GROUP_ID].add_variable("drx.drt.m")
drg[DR_GROUP_ID].add_variable("drx.drt.n")
drg[DR_GROUP_ID].add_variable("drx.drt.o")
drg[DR_GROUP_ID].add_variable("drx.drt.p")
drg[DR_GROUP_ID].add_variable("drx.drt.q[0]")
drg[DR_GROUP_ID].add_variable("drx.drt.q[1]")
drg[DR_GROUP_ID].add_variable("drx.drt.q[2]")
drg[DR_GROUP_ID].add_variable("drx.drt.q[3]")
drg[DR_GROUP_ID].add_variable("drx.drt.q[4]")
drg[DR_GROUP_ID].add_variable("drx.drt.r[0][0]")
trick.add_data_record_group(drg[DR_GROUP_ID], trick.DR_Buffer)
drg[DR_GROUP_ID].enable()

Binary file not shown.

View File

@ -5,6 +5,10 @@ trick_utest.unit_tests.set_file_name( os.getenv("TRICK_HOME") + "/trick_test/SIM
trick_utest.unit_tests.set_test_name( "DRTest" )
has_dhf5 = False
if hasattr(trick, 'DRHDF5'):
has_dhf5 = True
######################################################################################################################
test_suite = "drg api"
@ -16,10 +20,18 @@ num_drgs = trick.get_num_data_record_groups()
TRICK_EXPECT_EQ( num_drgs , 0 , test_suite , "0 drgs before any created" )
# The first item of each pair is the .dr file name and the second item of each pair is the drg name
dr_file_name_drg_name_tuple = (('Modified_data/dr_typesASCII.dr', 'DR_typesASCII'),
('Modified_data/dr_typesBINARY.dr', 'DR_typesBINARY'),
('Modified_data/dr_bitfASCII.dr', 'DR_bitfieldsASCII'),
('Modified_data/dr_bitfBINARY.dr', 'DR_bitfieldsBINARY'))
if has_dhf5:
dr_file_name_drg_name_tuple = (('Modified_data/dr_typesASCII.dr', 'DR_typesASCII'),
('Modified_data/dr_typesBINARY.dr', 'DR_typesBINARY'),
('Modified_data/dr_typesHDF5.dr', 'DR_typesHDF5'),
('Modified_data/dr_bitfASCII.dr', 'DR_bitfieldsASCII'),
('Modified_data/dr_bitfBINARY.dr', 'DR_bitfieldsBINARY'),
('Modified_data/dr_bitfHDF5.dr', 'DR_bitfieldsHDF5'))
else:
dr_file_name_drg_name_tuple = (('Modified_data/dr_typesASCII.dr', 'DR_typesASCII'),
('Modified_data/dr_typesBINARY.dr', 'DR_typesBINARY'),
('Modified_data/dr_bitfASCII.dr', 'DR_bitfieldsASCII'),
('Modified_data/dr_bitfBINARY.dr', 'DR_bitfieldsBINARY'))
num_files = len(dr_file_name_drg_name_tuple)
for i in range(num_files):
@ -29,7 +41,10 @@ for i in range(num_files):
num_drgs = trick.get_num_data_record_groups()
# Check the result of trick.get_num_data_record_groups()
TRICK_EXPECT_EQ( num_drgs , 4 , test_suite , "num of dr groups = 4" )
if has_dhf5:
TRICK_EXPECT_EQ( num_drgs , 6 , test_suite , "num of dr groups = 6" )
else:
TRICK_EXPECT_EQ( num_drgs , 4 , test_suite , "num of dr groups = 4" )
# Test trick.get_data_record_group(<drg_name>) for getting the drg pointer by its name
# Check the name of the obtained drg instead of the drg pointer
@ -49,7 +64,10 @@ TRICK_EXPECT_TRUE( is_null, test_suite , "null drg by nonexistent drg name" )
is_null = False
if trick.get_data_record_group_by_idx(num_drgs+1) is None :
is_null = True
TRICK_EXPECT_TRUE( is_null, test_suite , "null drg by drg id 5" )
if has_dhf5:
TRICK_EXPECT_TRUE( is_null, test_suite , "null drg by drg id 7" )
else:
TRICK_EXPECT_TRUE( is_null, test_suite , "null drg by drg id 5" )
is_null = False
if trick.get_data_record_group_by_idx(-1) is None :

View File

@ -13,6 +13,7 @@ PROGRAMMERS:
#include "trick/command_line_protos.h"
#include "trick/memorymanager_c_intf.h"
#include "trick/message_proto.h"
#include "trick/bitfield_proto.h"
Trick::DRHDF5::DRHDF5( std::string in_name, Trick::DR_Type dr_type ) : Trick::DataRecordGroup(in_name, dr_type) {
register_group_with_mm(this, "Trick::DRHDF5") ;
@ -38,7 +39,6 @@ int Trick::DRHDF5::format_specific_init() {
#ifdef HDF5
unsigned int ii ;
HDF5_INFO *hdf5_info ;
hsize_t chunk_size = 1024;
hid_t byte_id ;
hid_t file_names_id, param_types_id, param_units_id, param_names_id ;
@ -58,11 +58,21 @@ int Trick::DRHDF5::format_specific_init() {
return -1 ;
}
// Check file validity first
if (H5Iis_valid(file) <= 0) {
message_publish(MSG_ERROR, "File handle invalid, id=%lld\n", (long long)file);
return -1;
}
// All HDF5 objects live in the top-level "/" (root) group.
root_group = H5Gopen(file, "/", H5P_DEFAULT);
// Create a new group named "header" at the root ("/") level.
header_group = H5Gcreate(file, "/header", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
// Validate header group
if (H5Iis_valid(header_group) <= 0) {
message_publish(MSG_ERROR, "Header group invalid, id=%lld\n", (long long)header_group);
return -1;
}
// Create a packet table (PT) that stores byte order.
byte_id = H5PTcreate_fl(header_group, "byte_order", s256, chunk_size, 1) ;
// Add the byte order value to the byte packet table.
@ -76,11 +86,14 @@ int Trick::DRHDF5::format_specific_init() {
// Create a packet table (PT) that stores each parameter's name.
param_names_id = H5PTcreate_fl(header_group, "param_names", s256, chunk_size, 1) ;
// Allocate memory for the parameter names
param_names = new char*[rec_buffer.size()];
// Allocate memory for the dataset ids
param_dataset_ids = new hid_t[rec_buffer.size()];
// Create a table for each requested parameter.
for (ii = 0; ii < rec_buffer.size(); ii++) {
hdf5_info = (HDF5_INFO *)malloc(sizeof(HDF5_INFO));
/* Case statements taken from "parameter_types.h."
* HDF5 Native types found in "H5Tpublic.h." */
switch (rec_buffer[ii]->ref->attr->type) {
@ -128,9 +141,9 @@ int Trick::DRHDF5::format_specific_init() {
}
break;
case TRICK_UNSIGNED_BITFIELD:
if (rec_buffer[ii]->ref->attr->size == sizeof(int)) {
if (rec_buffer[ii]->ref->attr->size == sizeof(unsigned int)) {
datatype = H5T_NATIVE_UINT;
} else if (rec_buffer[ii]->ref->attr->size == sizeof(short)) {
} else if (rec_buffer[ii]->ref->attr->size == sizeof(unsigned short)) {
datatype = H5T_NATIVE_USHORT;
} else {
datatype = H5T_NATIVE_UCHAR;
@ -150,7 +163,6 @@ int Trick::DRHDF5::format_specific_init() {
#endif
break;
default:
free(hdf5_info);
continue;
}
@ -170,18 +182,19 @@ int Trick::DRHDF5::format_specific_init() {
* RETURN:
* Returns an identifier for the new packet table, or H5I_BADID on error.
*/
hdf5_info->dataset = H5PTcreate_fl(root_group, rec_buffer[ii]->ref->reference, datatype, chunk_size, 1) ;
if ( hdf5_info->dataset == H5I_BADID ) {
// Allocate memory for the parameter names
param_names[ii] = (char *)malloc(strlen(rec_buffer[ii]->ref->reference) + 1);
// Copy the parameter name to the list
strcpy(param_names[ii], rec_buffer[ii]->ref->reference);
// Create a packet table for each parameter
param_dataset_ids[ii] = H5PTcreate_fl(root_group, param_names[ii], datatype, chunk_size, 1) ;
// Validate the dataset
if ( param_dataset_ids[ii] == H5I_BADID ) {
message_publish(MSG_ERROR, "An error occured in data record group \"%s\" when adding \"%s\".\n",
group_name.c_str() , rec_buffer[ii]->ref->reference) ;
group_name.c_str() , param_names[ii]) ;
continue;
}
hdf5_info->drb = rec_buffer[ii] ;
/* Add the new parameter element to the end of the vector.
* This effectively increases the vector size by one. */
parameters.push_back(hdf5_info);
// As a bonus, add a header entry for each parameter.
/* File Name */
buf = "log_" + group_name ;
@ -210,6 +223,105 @@ int Trick::DRHDF5::format_specific_init() {
return(0);
}
#ifdef HDF5
/**
* Helper function to append specified data records for one variable to its dataset(packet table).
*/
void append_var_packet_table(Trick::DataRecordBuffer *drb, char* buf, size_t records, hid_t param_ds) {
// Data records to be appended to the packet table
void* data = 0;
int bf;
switch (drb->ref->attr->type) {
case TRICK_CHARACTER:
case TRICK_UNSIGNED_CHARACTER:
case TRICK_STRING:
case TRICK_SHORT:
case TRICK_UNSIGNED_SHORT:
case TRICK_ENUMERATED:
case TRICK_INTEGER:
case TRICK_UNSIGNED_INTEGER:
case TRICK_LONG:
case TRICK_UNSIGNED_LONG:
case TRICK_FLOAT:
case TRICK_DOUBLE:
H5PTappend(param_ds, records , buf);
break;
case TRICK_BITFIELD:
bf = GET_BITFIELD(buf, drb->ref->attr->size, drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
data = malloc(records * sizeof(bf));
// Extract bitfield for each record from different segments of buf
for (size_t j = 0; j < records; j++) {
// Calculate the correct offset in buf for each record
// Each record in buf has size of rec_buffer[ii]->ref->attr->size
size_t offset = j * drb->ref->attr->size;
if (drb->ref->attr->size == sizeof(int)) {
((int *)data)[j] = extract_bitfield_any(
*(int *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else if (drb->ref->attr->size == sizeof(short)) {
((short *)data)[j] = extract_bitfield_any(
*(short *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else if (drb->ref->attr->size == sizeof(char)) {
((char *)data)[j] = extract_bitfield_any(
*(char *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else {
((int*)data)[j] = extract_bitfield_any(
*(int *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
}
}
H5PTappend(param_ds, records, data);
break;
case TRICK_UNSIGNED_BITFIELD:
bf = GET_UNSIGNED_BITFIELD(buf, drb->ref->attr->size, drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
data = malloc(records * sizeof(bf));
// Extract bitfield for each record from different segments of buf
for (size_t j = 0; j < records; j++) {
// Calculate the correct offset in buf for each record
// Each record in buf has size of rec_buffer[ii]->ref->attr->size
size_t offset = j * drb->ref->attr->size; // record_size would be the size of one record in buf
if (drb->ref->attr->size == sizeof(int)) {
((unsigned int *)data)[j] = extract_unsigned_bitfield_any(
*(unsigned int *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else if (drb->ref->attr->size == sizeof(short)) {
((unsigned short *)data)[j] = extract_unsigned_bitfield_any(
*(unsigned short *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else if (drb->ref->attr->size == sizeof(char)) {
((unsigned char *)data)[j] = extract_unsigned_bitfield_any(
*(unsigned char *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
} else {
((int *)data)[j] = extract_unsigned_bitfield_any(
*(int *)(buf+offset), drb->ref->attr->size,
drb->ref->attr->index[0].start, drb->ref->attr->index[0].size);
}
}
H5PTappend(param_ds, records, data);
break;
case TRICK_LONG_LONG:
case TRICK_UNSIGNED_LONG_LONG:
case TRICK_BOOLEAN:
default:
H5PTappend(param_ds, records , buf);
break;
if (data != 0) {
free(data);
data = 0;
}
}
}
#endif
/*
HDF5 logging is done on a per variable basis instead of per time step like the
other recording methods. This write_data routine overrides the default in
@ -223,6 +335,8 @@ int Trick::DRHDF5::write_data(bool must_write) {
unsigned int num_to_write ;
unsigned int ii;
char *buf = 0;
size_t ds_records1;
size_t ds_records2;
if ( record and inited and (buffer_type == DR_No_Buffer or must_write)) {
@ -238,31 +352,28 @@ int Trick::DRHDF5::write_data(bool must_write) {
writer_num = local_buffer_num - num_to_write ;
if ( writer_num != local_buffer_num ) {
unsigned int writer_offset = writer_num % max_num ;
// Test if the writer pointer to the right of the buffer pointer in the ring
if ( (writer_num % max_num) > (local_buffer_num % max_num) ) {
// we have 2 segments to write per variable
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
unsigned int writer_offset = writer_num % max_num ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
ds_records1 = max_num - writer_offset;
ds_records2 = local_buffer_num % max_num;
/* Append all of the data on the end of the buffer to the packet table. */
H5PTappend( hi->dataset, max_num - writer_offset , buf );
// we have 2 segments to write per variable
for (ii = 0; ii < rec_buffer.size(); ii++) {
//unsigned int writer_offset = writer_num % max_num ;
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
append_var_packet_table(rec_buffer[ii], buf, ds_records1, param_dataset_ids[ii]);
buf = hi->drb->buffer ;
/* Append all of the data at the beginning of the buffer to the packet table. */
H5PTappend( hi->dataset, local_buffer_num % max_num , buf );
}
buf = rec_buffer[ii]->buffer ;
append_var_packet_table(rec_buffer[ii], buf, ds_records2, param_dataset_ids[ii]);
}
} else {
// we have 1 continous segment to write per variable
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
unsigned int writer_offset = writer_num % max_num ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
/* Append all of the data to the packet table. */
H5PTappend( hi->dataset, local_buffer_num - writer_num , buf );
ds_records1 = local_buffer_num - writer_num;
// we have 1 continous segment to write per variable
for (ii = 0; ii < rec_buffer.size(); ii++) {
//unsigned int writer_offset = writer_num % max_num ;
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
append_var_packet_table(rec_buffer[ii], buf, ds_records1, param_dataset_ids[ii]);
}
}
writer_num = local_buffer_num ;
@ -290,16 +401,13 @@ int Trick::DRHDF5::format_specific_write_data(unsigned int writer_offset __attri
char *buf = 0;
/* Loop through each parameter. */
for (ii = 0; ii < parameters.size(); ii++) {
for (ii = 0; ii < rec_buffer.size(); ii++) {
/* Each parameters[] element contains a DataRecordBuffer class.
* So there is a seperate DataRecordBuffer per variable.
* Point to the value to be recorded. */
HDF5_INFO * hi = parameters[ii] ;
buf = hi->drb->buffer + (writer_offset * hi->drb->ref->attr->size) ;
/* Append 1 value to the packet table. */
H5PTappend( hi->dataset, 1, buf );
buf = rec_buffer[ii]->buffer + (writer_offset * rec_buffer[ii]->ref->attr->size) ;
append_var_packet_table(rec_buffer[ii], buf, 1, param_dataset_ids[ii]);
}
#endif
@ -320,11 +428,26 @@ int Trick::DRHDF5::format_specific_shutdown() {
unsigned int ii ;
if ( inited ) {
for (ii = 0; ii < parameters.size(); ii++) {
HDF5_INFO * hi = parameters[ii] ;
H5PTclose( hi->dataset );
for (ii = 0; ii < rec_buffer.size(); ii++) {
// Free parameter names memory
free(param_names[ii]);
// Close the parameter dataset
if (param_dataset_ids[ii] != H5I_BADID) {
H5PTclose(param_dataset_ids[ii]);
}
}
// Free the parameter names array
delete[] param_names;
// Set the pointer to NULL
param_names = nullptr;
// Free the dataset ids array
delete[] param_dataset_ids;
// Set the pointer to NULL
param_dataset_ids = nullptr;
// Close root group
H5Gclose(root_group);
// Close file handle
H5Fclose(file);
}