From c88027e93bbef35e356f8ef408d08805d7527aac Mon Sep 17 00:00:00 2001 From: peter Date: Fri, 13 Mar 2026 09:01:41 -0700 Subject: [PATCH] memory alignment in tsk_json_struct_metadata_get_blob --- c/tests/test_core.c | 44 ++++++++++++++++++++++++++++++++++---------- c/tskit/core.c | 38 ++++++++++++++++++++++++++++++++------ c/tskit/core.h | 19 +++++++++++++++---- 3 files changed, 81 insertions(+), 20 deletions(-) diff --git a/c/tests/test_core.c b/c/tests/test_core.c index 9d8d9f5a7d..23354db1db 100644 --- a/c/tests/test_core.c +++ b/c/tests/test_core.c @@ -109,6 +109,7 @@ test_json_struct_metadata_get_blob(void) tsk_size_t metadata_length; size_t header_length; size_t json_length; + size_t padding_length; size_t payload_length; size_t total_length; char json_payload[] = "{\"a\":1}"; @@ -118,8 +119,9 @@ test_json_struct_metadata_get_blob(void) bytes = (uint8_t *) metadata; header_length = 4 + 1 + 8 + 8; json_length = strlen(json_payload); + padding_length = (8 - ((header_length + json_length) & 0x07)) % 8; payload_length = sizeof(binary_payload); - total_length = header_length + json_length + payload_length; + total_length = header_length + json_length + padding_length + payload_length; CU_ASSERT_FATAL(total_length <= sizeof(metadata)); memset(metadata, 0, sizeof(metadata)); bytes[0] = 'J'; @@ -130,40 +132,49 @@ test_json_struct_metadata_get_blob(void) set_u64_le(bytes + 5, (uint64_t) json_length); set_u64_le(bytes + 13, (uint64_t) payload_length); memcpy(bytes + header_length, json_payload, json_length); - memcpy(bytes + header_length + json_length, binary_payload, payload_length); + memset(bytes + header_length + json_length, 0, padding_length); + memcpy(bytes + header_length + json_length + padding_length, binary_payload, + payload_length); metadata_length = (tsk_size_t) total_length; ret = tsk_json_struct_metadata_get_blob( metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); CU_ASSERT_EQUAL(ret, 0); CU_ASSERT_PTR_EQUAL(json, (char *) bytes + header_length); + CU_ASSERT_EQUAL(json + json_buffer_length + padding_length, blob); CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); if (json_length > 0) { CU_ASSERT_EQUAL(memcmp(json, json_payload, json_length), 0); } - CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length + padding_length); CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); CU_ASSERT_EQUAL(memcmp(blob, binary_payload, payload_length), 0); + CU_ASSERT((tsk_size_t) (blob - json) < json_buffer_length + 8); payload_length = 0; - total_length = header_length + json_length + payload_length; + total_length = header_length + json_length + padding_length + payload_length; CU_ASSERT_FATAL(total_length <= sizeof(metadata)); set_u64_le(bytes + 13, (uint64_t) payload_length); metadata_length = (tsk_size_t) total_length; ret = tsk_json_struct_metadata_get_blob( metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_EQUAL(json + json_buffer_length + padding_length, blob); CU_ASSERT_PTR_EQUAL(json, (char *) bytes + header_length); CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); - CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length + padding_length); + CU_ASSERT((tsk_size_t) (blob - json) < json_buffer_length + 8); json_length = 0; payload_length = sizeof(empty_payload); - total_length = header_length + json_length + payload_length; + padding_length = (8 - ((header_length + json_length) & 0x07)) % 8; + total_length = header_length + json_length + padding_length + payload_length; CU_ASSERT_FATAL(total_length <= sizeof(metadata)); set_u64_le(bytes + 5, (uint64_t) json_length); set_u64_le(bytes + 13, (uint64_t) payload_length); - memcpy(bytes + header_length + json_length, empty_payload, payload_length); + memset(bytes + header_length + json_length, 0, padding_length); + memcpy(bytes + header_length + json_length + padding_length, empty_payload, + payload_length); metadata_length = (tsk_size_t) total_length; ret = tsk_json_struct_metadata_get_blob( metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); @@ -171,13 +182,14 @@ test_json_struct_metadata_get_blob(void) CU_ASSERT_PTR_EQUAL(json, (char *) bytes + header_length); CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); - CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length + padding_length); CU_ASSERT_EQUAL(memcmp(blob, empty_payload, payload_length), 0); + CU_ASSERT((tsk_size_t) (blob - json) < json_buffer_length + 8); blob = NULL; blob_length = 0; json = NULL; - json_buffer_length = 0; + json_length = 0; metadata_length = header_length - 1; ret = tsk_json_struct_metadata_get_blob( metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); @@ -196,7 +208,19 @@ test_json_struct_metadata_get_blob(void) CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION); bytes[4] = 1; - metadata_length = (tsk_size_t) (total_length - 1); + set_u64_le(bytes + 5, (uint64_t) json_length + 9); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_UNEXPECTED_SIZE); + set_u64_le(bytes + 5, (uint64_t) json_length); + + bytes[header_length + 1] = 1; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_NONZERO_PADDING); + bytes[header_length + 1] = 0; + + metadata_length = (tsk_size_t) (header_length - 1); ret = tsk_json_struct_metadata_get_blob( metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED); diff --git a/c/tskit/core.c b/c/tskit/core.c index 66f8cbd0ef..18d6affda3 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -150,10 +150,13 @@ tsk_json_struct_metadata_get_blob(char *metadata, tsk_size_t metadata_length, uint64_t json_length_u64; uint64_t binary_length_u64; uint64_t header_and_json_length; + uint64_t padding_length; + uint64_t header_and_json_and_padding_length; uint64_t total_length; uint8_t *bytes; - char *blob_start; char *json_start; + char *padding_start; + char *blob_start; if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL || blob_length == NULL) { @@ -181,17 +184,32 @@ tsk_json_struct_metadata_get_blob(char *metadata, tsk_size_t metadata_length, goto out; } header_and_json_length = (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64; - if (binary_length_u64 > UINT64_MAX - header_and_json_length) { + padding_length = (8 - (header_and_json_length & 0x07)) % 8; + if (padding_length > UINT64_MAX - header_and_json_length) { ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH); goto out; } - total_length = header_and_json_length + binary_length_u64; - if ((uint64_t) metadata_length < total_length) { - ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED); + header_and_json_and_padding_length = header_and_json_length + padding_length; + if (binary_length_u64 > UINT64_MAX - header_and_json_and_padding_length) { + ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH); + goto out; + } + total_length = header_and_json_and_padding_length + binary_length_u64; + if ((uint64_t) metadata_length != total_length) { + ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_UNEXPECTED_SIZE); goto out; } + padding_start = (char *) bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64; + for (uint64_t padding_index = 0; padding_index < padding_length; ++padding_index) { + // require padding bytes to be zero, for a bit more safety + if (*(padding_start + padding_index) != (char) 0) { + ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_NONZERO_PADDING); + goto out; + } + } json_start = (char *) bytes + TSK_JSON_BINARY_HEADER_SIZE; - blob_start = (char *) bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64; + blob_start = (char *) bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64 + + padding_length; *json = json_start; *json_length = (tsk_size_t) json_length_u64; *blob = blob_start; @@ -283,6 +301,14 @@ tsk_strerror_internal(int err) ret = "JSON binary struct metadata uses an unsupported version number. " "(TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION)"; break; + case TSK_ERR_JSON_STRUCT_METADATA_UNEXPECTED_SIZE: + ret = "JSON binary struct metadata is not equal to the expected size. " + "(TSK_ERR_JSON_STRUCT_METADATA_UNEXPECTED_SIZE)"; + break; + case TSK_ERR_JSON_STRUCT_METADATA_NONZERO_PADDING: + ret = "JSON binary struct metadata has non-zero padding bytes between the " + "JSON and struct. (TSK_ERR_JSON_STRUCT_METADATA_NONZERO_PADDING)"; + break; /* Out of bounds errors */ case TSK_ERR_BAD_OFFSET: diff --git a/c/tskit/core.h b/c/tskit/core.h index 2964e3d8f1..30f1c77ab0 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -329,6 +329,16 @@ A length field in the JSON binary struct metadata header is invalid. The JSON binary struct metadata uses an unsupported version number. */ #define TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION -109 + +/** +The JSON binary struct metadata is not equal to the expected size. +*/ +#define TSK_ERR_JSON_STRUCT_METADATA_UNEXPECTED_SIZE -110 + +/** +The JSON binary struct metadata padding bytes are not zeroed. +*/ +#define TSK_ERR_JSON_STRUCT_METADATA_NONZERO_PADDING -111 /** @} */ /** @@ -1136,10 +1146,11 @@ int tsk_generate_uuid(char *dest, int flags); @brief Extract the binary payload from ``json+struct`` encoded metadata. @rst -Metadata produced by the JSONStructCodec consists of a fixed-size -header followed by canonical JSON bytes and an optional binary payload. This helper -validates the framing, returning pointers to the embedded JSON and binary sections -without copying. +Metadata produced by the JSONStructCodec consists of a fixed-size header followed +by canonical JSON bytes, a variable number of padding bytes (which should be zero) +to bring the length to a multiple of 8 bytes for alignment, and finally an optional +binary payload. This helper validates the framing, returning pointers to the +embedded JSON and binary sections without copying. The output pointers reference memory owned by the caller and remain valid only while the original metadata buffer is alive.