Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

In development

- Add ``tsk_json_struct_metadata_get_blob`` function
(:user:`benjeffery`, :pr:`3306`)

--------------------
[1.3.1] - 2026-03-06
--------------------
Expand Down
157 changes: 157 additions & 0 deletions c/tests/test_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "testlib.h"
#include <tskit/core.h>
#include <math.h>
#include <string.h>

#include <unistd.h>

Expand Down Expand Up @@ -82,6 +83,161 @@ test_generate_uuid(void)
CU_ASSERT_STRING_NOT_EQUAL(uuid, other_uuid);
}

static void
set_u64_le(uint8_t *dest, uint64_t value)
{
dest[0] = (uint8_t) (value & 0xFF);
dest[1] = (uint8_t) ((value >> 8) & 0xFF);
dest[2] = (uint8_t) ((value >> 16) & 0xFF);
dest[3] = (uint8_t) ((value >> 24) & 0xFF);
dest[4] = (uint8_t) ((value >> 32) & 0xFF);
dest[5] = (uint8_t) ((value >> 40) & 0xFF);
dest[6] = (uint8_t) ((value >> 48) & 0xFF);
dest[7] = (uint8_t) ((value >> 56) & 0xFF);
}

static void
test_json_struct_metadata_get_blob(void)
{
int ret;
char metadata[128];
const char *json;
tsk_size_t json_buffer_length;
const char *blob;
tsk_size_t blob_length;
uint8_t *bytes;
tsk_size_t metadata_length;
size_t header_length;
size_t json_length;
size_t payload_length;
size_t total_length;
const char json_payload[] = "{\"a\":1}";
const uint8_t binary_payload[] = { 0x01, 0x02, 0x03, 0x04 };
const uint8_t empty_payload[] = { 0 };

bytes = (uint8_t *) metadata;
header_length = 4 + 1 + 8 + 8;
json_length = strlen(json_payload);
payload_length = sizeof(binary_payload);
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
memset(metadata, 0, sizeof(metadata));
bytes[0] = 'J';
bytes[1] = 'B';
bytes[2] = 'L';
bytes[3] = 'B';
bytes[4] = 1;
set_u64_le(bytes + 5, (uint64_t) json_length);
set_u64_le(bytes + 13, (uint64_t) payload_length);
memcpy(bytes + header_length, json_payload, json_length);
memcpy(bytes + header_length + json_length, binary_payload, payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
if (json_length > 0) {
CU_ASSERT_EQUAL(memcmp(json, json_payload, json_length), 0);
}
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_EQUAL(memcmp(blob, binary_payload, payload_length), 0);

payload_length = 0;
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
set_u64_le(bytes + 13, (uint64_t) payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);

json_length = 0;
payload_length = sizeof(empty_payload);
total_length = header_length + json_length + payload_length;
CU_ASSERT_FATAL(total_length <= sizeof(metadata));
set_u64_le(bytes + 5, (uint64_t) json_length);
set_u64_le(bytes + 13, (uint64_t) payload_length);
memcpy(bytes + header_length + json_length, empty_payload, payload_length);
metadata_length = (tsk_size_t) total_length;
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length);
CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length);
CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length);
CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length);
CU_ASSERT_EQUAL(memcmp(blob, empty_payload, payload_length), 0);

blob = NULL;
blob_length = 0;
json = NULL;
json_buffer_length = 0;
metadata_length = header_length - 1;
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);

metadata_length = (tsk_size_t) total_length;
bytes[0] = 'X';
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC);
bytes[0] = 'J';

bytes[4] = 2;
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION);
bytes[4] = 1;

metadata_length = (tsk_size_t) (total_length - 1);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);

ret = tsk_json_struct_metadata_get_blob(
NULL, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, NULL, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, NULL, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, NULL, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);

memset(metadata, 0, sizeof(metadata));
bytes[0] = 'J';
bytes[1] = 'B';
bytes[2] = 'L';
bytes[3] = 'B';
bytes[4] = 1;
metadata_length = (tsk_size_t) header_length;

set_u64_le(bytes + 5, UINT64_MAX - (uint64_t) header_length + 1);
set_u64_le(bytes + 13, 0);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);

set_u64_le(bytes + 5, 8);
set_u64_le(bytes + 13, UINT64_MAX - (uint64_t) (header_length + 8) + 1);
ret = tsk_json_struct_metadata_get_blob(
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
}

static void
test_double_round(void)
{
Expand Down Expand Up @@ -652,6 +808,7 @@ main(int argc, char **argv)
{ "test_strerror", test_strerror },
{ "test_strerror_kastore", test_strerror_kastore },
{ "test_generate_uuid", test_generate_uuid },
{ "test_json_struct_metadata_get_blob", test_json_struct_metadata_get_blob },
{ "test_double_round", test_double_round },
{ "test_blkalloc", test_blkalloc },
{ "test_unknown_time", test_unknown_time },
Expand Down
98 changes: 97 additions & 1 deletion c/tskit/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
#include <kastore.h>
#include <tskit/core.h>

#define UUID_NUM_BYTES 16
#define UUID_NUM_BYTES 16
#define TSK_JSON_BINARY_HEADER_SIZE 21

static const uint8_t _tsk_json_binary_magic[4] = { 'J', 'B', 'L', 'B' };

#if defined(_WIN32)

Expand Down Expand Up @@ -95,6 +98,22 @@ get_random_bytes(uint8_t *buf)

#endif

static uint64_t
tsk_load_u64_le(const uint8_t *p)
{
uint64_t value;

value = (uint64_t) p[0];
value |= (uint64_t) p[1] << 8;
value |= (uint64_t) p[2] << 16;
value |= (uint64_t) p[3] << 24;
value |= (uint64_t) p[4] << 32;
value |= (uint64_t) p[5] << 40;
value |= (uint64_t) p[6] << 48;
value |= (uint64_t) p[7] << 56;
return value;
}

/* Generate a new UUID4 using a system-generated source of randomness.
* Note that this function writes a NULL terminator to the end of this
* string, so that the total length of the buffer must be 37 bytes.
Expand All @@ -121,6 +140,67 @@ tsk_generate_uuid(char *dest, int TSK_UNUSED(flags))
out:
return ret;
}

int
tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const char **blob,
tsk_size_t *blob_length)
{
int ret;
uint8_t version;
uint64_t json_length_u64;
uint64_t binary_length_u64;
uint64_t header_and_json_length;
uint64_t total_length;
const uint8_t *bytes;
const char *blob_start;
const char *json_start;

if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL
|| blob_length == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
bytes = (const uint8_t *) metadata;
if (metadata_length < TSK_JSON_BINARY_HEADER_SIZE) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
goto out;
}
if (memcmp(bytes, _tsk_json_binary_magic, sizeof(_tsk_json_binary_magic)) != 0) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC);
goto out;
}
version = bytes[4];
if (version != 1) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION);
goto out;
}
json_length_u64 = tsk_load_u64_le(bytes + 5);
binary_length_u64 = tsk_load_u64_le(bytes + 13);
if (json_length_u64 > UINT64_MAX - (uint64_t) TSK_JSON_BINARY_HEADER_SIZE) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
goto out;
}
header_and_json_length = (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
if (binary_length_u64 > UINT64_MAX - header_and_json_length) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
goto out;
}
total_length = header_and_json_length + binary_length_u64;
if ((uint64_t) metadata_length < total_length) {
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
goto out;
}
json_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE;
blob_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
*json = json_start;
*json_length = (tsk_size_t) json_length_u64;
*blob = blob_start;
*blob_length = (tsk_size_t) binary_length_u64;
ret = 0;
out:
return ret;
}
static const char *
tsk_strerror_internal(int err)
{
Expand Down Expand Up @@ -188,6 +268,22 @@ tsk_strerror_internal(int err)
ret = "An incompatible type for a column was found in the file. "
"(TSK_ERR_BAD_COLUMN_TYPE)";
break;
case TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC:
ret = "JSON binary struct metadata does not begin with the expected "
"magic bytes. (TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC)";
break;
case TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED:
ret = "JSON binary struct metadata is shorter than the expected size. "
"(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED)";
break;
case TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH:
ret = "A length field in the JSON binary struct metadata header is invalid. "
"(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH)";
break;
case TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION:
ret = "JSON binary struct metadata uses an unsupported version number. "
"(TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION)";
break;

/* Out of bounds errors */
case TSK_ERR_BAD_OFFSET:
Expand Down
45 changes: 45 additions & 0 deletions c/tskit/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,26 @@ not found in the file.
An unsupported type was provided for a column in the file.
*/
#define TSK_ERR_BAD_COLUMN_TYPE -105

/**
The JSON binary struct metadata does not begin with the expected magic bytes.
*/
#define TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC -106

/**
The JSON binary struct metadata is shorter than the expected size.
*/
#define TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED -107

/**
A length field in the JSON binary struct metadata header is invalid.
*/
#define TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH -108

/**
The JSON binary struct metadata uses an unsupported version number.
*/
#define TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION -109
/** @} */

/**
Expand Down Expand Up @@ -1112,6 +1132,31 @@ bool tsk_isfinite(double val);
#define TSK_UUID_SIZE 36
int tsk_generate_uuid(char *dest, int flags);

/**
@brief Extract the binary payload from ``json+struct`` encoded metadata.

@rst
Metadata produced by the JSONStructCodec consists of a fixed-size
header followed by canonical JSON bytes and an optional binary payload. This helper
validates the framing, returning pointers to the embedded JSON and binary sections
without copying.

The output pointers reference memory owned by the caller and remain valid only while
the original metadata buffer is alive.
@endrst

@param[in] metadata Pointer to the encoded metadata bytes.
@param[in] metadata_length Number of bytes available at ``metadata``.
@param[out] json On success, set to the start of the JSON bytes.
@param[out] json_length On success, set to the JSON length in bytes.
@param[out] blob On success, set to the start of the binary payload.
@param[out] blob_length On success, set to the payload length in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
const char **json, tsk_size_t *json_length, const char **blob,
tsk_size_t *blob_length);

/* TODO most of these can probably be macros so they compile out as no-ops.
* Lets do the 64 bit tsk_size_t switch first though. */
void *tsk_malloc(tsk_size_t size);
Expand Down
1 change: 1 addition & 0 deletions docs/c-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,7 @@ Miscellaneous functions

.. doxygenfunction:: tsk_is_unknown_time

.. doxygenfunction:: tsk_json_struct_metadata_get_blob

*************************
Function Specific Options
Expand Down
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

In development

- Add ``json+struct`` metadata codec that allows storing binary data using a struct
schema alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`)

--------------------
[1.0.2] - 2026-03-06
--------------------
Expand Down Expand Up @@ -107,6 +110,7 @@ Maintenance release.
also around 10% faster.
(:user:`benjeffery`, :pr:`3313`, :pr:`3317`, :issue:`1896`)


**Bugfixes**

- In some tables with mutations out-of-order ``TableCollection.sort`` did not re-order
Expand Down
Loading