From 526fbac0466dd725344bdc58c190d2fec0e36495 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 17 Mar 2026 16:47:42 +0100 Subject: [PATCH 1/2] Add explicit bytes conversion with {bytes, Binary} tuple Erlang binaries are converted to Python str by default (UTF-8 decoded). This adds support for {bytes, Binary} tagged tuple to explicitly convert to Python bytes without UTF-8 decoding. Usage: py:call(mod, func, [{bytes, <<"data">>}]) %% Python receives: b"data" Useful for binary protocols, images, and compressed data. --- c_src/py_convert.c | 21 +++++++++++++++++++++ docs/type-conversion.md | 20 ++++++++++++++++++++ test/py_api_SUITE.erl | 40 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/c_src/py_convert.c b/c_src/py_convert.c index 1257e0e..392abec 100644 --- a/c_src/py_convert.c +++ b/c_src/py_convert.c @@ -520,6 +520,27 @@ static PyObject *term_to_py(ErlNifEnv *env, ERL_NIF_TERM term) { return PyBytes_FromStringAndSize((char *)bin.data, bin.size); } + /* + * Check for {bytes, Binary} tagged tuple - explicit bytes conversion. + * This allows users to explicitly send raw bytes without UTF-8 decoding. + */ + { + int tuple_arity; + const ERL_NIF_TERM *tuple_elements; + if (enif_get_tuple(env, term, &tuple_arity, &tuple_elements) && tuple_arity == 2) { + char tag_buf[16]; + if (enif_get_atom(env, tuple_elements[0], tag_buf, sizeof(tag_buf), ERL_NIF_LATIN1)) { + if (strcmp(tag_buf, "bytes") == 0) { + ErlNifBinary bytes_bin; + if (enif_inspect_binary(env, tuple_elements[1], &bytes_bin)) { + return PyBytes_FromStringAndSize((char *)bytes_bin.data, bytes_bin.size); + } + /* Not a binary - fall through to normal tuple handling */ + } + } + } + } + /* Check list (must come after binary to preserve structure) */ if (enif_get_list_length(env, term, &list_len)) { PyObject *list = PyList_New(list_len); diff --git a/docs/type-conversion.md b/docs/type-conversion.md index 618eb6b..ef8b4ff 100644 --- a/docs/type-conversion.md +++ b/docs/type-conversion.md @@ -11,6 +11,7 @@ When calling Python functions or evaluating expressions, Erlang values are autom | `integer()` | `int` | Arbitrary precision supported | | `float()` | `float` | IEEE 754 double precision | | `binary()` | `str` | UTF-8 encoded | +| `{bytes, binary()}` | `bytes` | Explicit bytes (no UTF-8 decode) | | `atom()` | `str` | Converted to string (except special atoms) | | `true` | `True` | Boolean | | `false` | `False` | Boolean | @@ -56,6 +57,25 @@ py:call(mymod, func, [{1, 2, 3}]). %% Python receives: (1, 2, 3) py:call(mymod, func, [#{a => 1, b => 2}]). %% Python receives: {"a": 1, "b": 2} ``` +### Explicit Bytes Conversion + +By default, Erlang binaries are converted to Python `str` using UTF-8 decoding. +To explicitly send raw bytes without string conversion, use the `{bytes, Binary}` tuple: + +```erlang +%% Default: binary -> str +py:call(mymod, func, [<<"hello">>]). %% Python sees: "hello" (str) + +%% Explicit: {bytes, binary} -> bytes +py:call(mymod, func, [{bytes, <<"hello">>}]). %% Python sees: b"hello" (bytes) + +%% Useful for binary protocols, images, compressed data +py:call(image_processor, load, [{bytes, ImageData}]). +``` + +This is useful when you need to ensure binary data is treated as raw bytes in Python, +for example when working with binary protocols, image data, or compressed content. + ## Python to Erlang Return values from Python are converted back to Erlang: diff --git a/test/py_api_SUITE.erl b/test/py_api_SUITE.erl index ad46953..3e8c49f 100644 --- a/test/py_api_SUITE.erl +++ b/test/py_api_SUITE.erl @@ -29,7 +29,9 @@ test_context_management/1, test_start_stop_contexts/1, %% Mixed usage - test_mixed_api_usage/1 + test_mixed_api_usage/1, + %% Type conversion + test_explicit_bytes/1 ]). %% ============================================================================ @@ -52,7 +54,9 @@ all() -> test_context_management, test_start_stop_contexts, %% Mixed usage - test_mixed_api_usage + test_mixed_api_usage, + %% Type conversion + test_explicit_bytes ]. init_per_suite(Config) -> @@ -223,3 +227,35 @@ test_mixed_api_usage(_Config) -> %% Both should work correctly {ok, 6} = py:eval(<<"2 + 4">>), {ok, 7} = py:eval(Ctx, <<"3 + 4">>, #{}). + +%% ============================================================================ +%% Type Conversion Tests +%% ============================================================================ + +%% @doc Test explicit bytes conversion using {bytes, Binary} tuple. +test_explicit_bytes(_Config) -> + Ctx = py:context(1), + + %% Define test functions + ok = py:exec(Ctx, <<" +def check_type(val): + return type(val).__name__ + +def check_bytes_value(val): + return val == b'hello' +">>), + + %% Regular binary -> str (default UTF-8 decoding) + {ok, <<"str">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => <<"hello">>}), + + %% Explicit bytes tuple -> bytes + {ok, <<"bytes">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => {bytes, <<"hello">>}}), + + %% Verify value is correct + {ok, true} = py:eval(Ctx, <<"check_bytes_value(val)">>, #{<<"val">> => {bytes, <<"hello">>}}), + + %% Test with binary data (non-UTF8) + NonUtf8 = <<255, 254, 0, 1>>, + {ok, <<"bytes">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => {bytes, NonUtf8}}), + + ok. From 522ebbe0184ded6e2dbf801cf992bc6b3f94820a Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Tue, 17 Mar 2026 16:52:25 +0100 Subject: [PATCH 2/2] Document round-trip behavior for bytes conversion --- docs/type-conversion.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/type-conversion.md b/docs/type-conversion.md index ef8b4ff..c23e196 100644 --- a/docs/type-conversion.md +++ b/docs/type-conversion.md @@ -76,6 +76,19 @@ py:call(image_processor, load, [{bytes, ImageData}]). This is useful when you need to ensure binary data is treated as raw bytes in Python, for example when working with binary protocols, image data, or compressed content. +Note that on the return path, both Python `str` and `bytes` become Erlang `binary()`: + +```erlang +%% Python str -> Erlang binary +{ok, <<"hello">>} = py:eval(<<"'hello'">>). + +%% Python bytes -> Erlang binary +{ok, <<"hello">>} = py:eval(<<"b'hello'">>). + +%% Non-UTF8 bytes also work +{ok, <<255, 254>>} = py:eval(<<"b'\\xff\\xfe'">>). +``` + ## Python to Erlang Return values from Python are converted back to Erlang: