Skip to content

Commit ea7bdf0

Browse files
authored
Merge pull request #36 from benoitc/feature/explicit-bytes-conversion
Add explicit bytes conversion with {bytes, Binary} tuple
2 parents 2b0e7d3 + 522ebbe commit ea7bdf0

File tree

3 files changed

+92
-2
lines changed

3 files changed

+92
-2
lines changed

c_src/py_convert.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,27 @@ static PyObject *term_to_py(ErlNifEnv *env, ERL_NIF_TERM term) {
520520
return PyBytes_FromStringAndSize((char *)bin.data, bin.size);
521521
}
522522

523+
/*
524+
* Check for {bytes, Binary} tagged tuple - explicit bytes conversion.
525+
* This allows users to explicitly send raw bytes without UTF-8 decoding.
526+
*/
527+
{
528+
int tuple_arity;
529+
const ERL_NIF_TERM *tuple_elements;
530+
if (enif_get_tuple(env, term, &tuple_arity, &tuple_elements) && tuple_arity == 2) {
531+
char tag_buf[16];
532+
if (enif_get_atom(env, tuple_elements[0], tag_buf, sizeof(tag_buf), ERL_NIF_LATIN1)) {
533+
if (strcmp(tag_buf, "bytes") == 0) {
534+
ErlNifBinary bytes_bin;
535+
if (enif_inspect_binary(env, tuple_elements[1], &bytes_bin)) {
536+
return PyBytes_FromStringAndSize((char *)bytes_bin.data, bytes_bin.size);
537+
}
538+
/* Not a binary - fall through to normal tuple handling */
539+
}
540+
}
541+
}
542+
}
543+
523544
/* Check list (must come after binary to preserve structure) */
524545
if (enif_get_list_length(env, term, &list_len)) {
525546
PyObject *list = PyList_New(list_len);

docs/type-conversion.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ When calling Python functions or evaluating expressions, Erlang values are autom
1111
| `integer()` | `int` | Arbitrary precision supported |
1212
| `float()` | `float` | IEEE 754 double precision |
1313
| `binary()` | `str` | UTF-8 encoded |
14+
| `{bytes, binary()}` | `bytes` | Explicit bytes (no UTF-8 decode) |
1415
| `atom()` | `str` | Converted to string (except special atoms) |
1516
| `true` | `True` | Boolean |
1617
| `false` | `False` | Boolean |
@@ -56,6 +57,38 @@ py:call(mymod, func, [{1, 2, 3}]). %% Python receives: (1, 2, 3)
5657
py:call(mymod, func, [#{a => 1, b => 2}]). %% Python receives: {"a": 1, "b": 2}
5758
```
5859

60+
### Explicit Bytes Conversion
61+
62+
By default, Erlang binaries are converted to Python `str` using UTF-8 decoding.
63+
To explicitly send raw bytes without string conversion, use the `{bytes, Binary}` tuple:
64+
65+
```erlang
66+
%% Default: binary -> str
67+
py:call(mymod, func, [<<"hello">>]). %% Python sees: "hello" (str)
68+
69+
%% Explicit: {bytes, binary} -> bytes
70+
py:call(mymod, func, [{bytes, <<"hello">>}]). %% Python sees: b"hello" (bytes)
71+
72+
%% Useful for binary protocols, images, compressed data
73+
py:call(image_processor, load, [{bytes, ImageData}]).
74+
```
75+
76+
This is useful when you need to ensure binary data is treated as raw bytes in Python,
77+
for example when working with binary protocols, image data, or compressed content.
78+
79+
Note that on the return path, both Python `str` and `bytes` become Erlang `binary()`:
80+
81+
```erlang
82+
%% Python str -> Erlang binary
83+
{ok, <<"hello">>} = py:eval(<<"'hello'">>).
84+
85+
%% Python bytes -> Erlang binary
86+
{ok, <<"hello">>} = py:eval(<<"b'hello'">>).
87+
88+
%% Non-UTF8 bytes also work
89+
{ok, <<255, 254>>} = py:eval(<<"b'\\xff\\xfe'">>).
90+
```
91+
5992
## Python to Erlang
6093

6194
Return values from Python are converted back to Erlang:

test/py_api_SUITE.erl

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
test_context_management/1,
3030
test_start_stop_contexts/1,
3131
%% Mixed usage
32-
test_mixed_api_usage/1
32+
test_mixed_api_usage/1,
33+
%% Type conversion
34+
test_explicit_bytes/1
3335
]).
3436

3537
%% ============================================================================
@@ -52,7 +54,9 @@ all() ->
5254
test_context_management,
5355
test_start_stop_contexts,
5456
%% Mixed usage
55-
test_mixed_api_usage
57+
test_mixed_api_usage,
58+
%% Type conversion
59+
test_explicit_bytes
5660
].
5761

5862
init_per_suite(Config) ->
@@ -223,3 +227,35 @@ test_mixed_api_usage(_Config) ->
223227
%% Both should work correctly
224228
{ok, 6} = py:eval(<<"2 + 4">>),
225229
{ok, 7} = py:eval(Ctx, <<"3 + 4">>, #{}).
230+
231+
%% ============================================================================
232+
%% Type Conversion Tests
233+
%% ============================================================================
234+
235+
%% @doc Test explicit bytes conversion using {bytes, Binary} tuple.
236+
test_explicit_bytes(_Config) ->
237+
Ctx = py:context(1),
238+
239+
%% Define test functions
240+
ok = py:exec(Ctx, <<"
241+
def check_type(val):
242+
return type(val).__name__
243+
244+
def check_bytes_value(val):
245+
return val == b'hello'
246+
">>),
247+
248+
%% Regular binary -> str (default UTF-8 decoding)
249+
{ok, <<"str">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => <<"hello">>}),
250+
251+
%% Explicit bytes tuple -> bytes
252+
{ok, <<"bytes">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => {bytes, <<"hello">>}}),
253+
254+
%% Verify value is correct
255+
{ok, true} = py:eval(Ctx, <<"check_bytes_value(val)">>, #{<<"val">> => {bytes, <<"hello">>}}),
256+
257+
%% Test with binary data (non-UTF8)
258+
NonUtf8 = <<255, 254, 0, 1>>,
259+
{ok, <<"bytes">>} = py:eval(Ctx, <<"check_type(val)">>, #{<<"val">> => {bytes, NonUtf8}}),
260+
261+
ok.

0 commit comments

Comments
 (0)