Skip to content

Commit c8356fc

Browse files
authored
Prevent data copy in VideoFrame.to_ndarray() for padded frames (#2190)
1 parent 20f6b7e commit c8356fc

File tree

2 files changed

+57
-39
lines changed

2 files changed

+57
-39
lines changed

av/video/frame.py

Lines changed: 56 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,13 @@ class PictureType(IntEnum):
374374
BI = lib.AV_PICTURE_TYPE_BI # BI type
375375

376376

377+
_is_big_endian = cython.declare(cython.bint, sys.byteorder == "big")
378+
379+
377380
@cython.cfunc
381+
@cython.inline
378382
def byteswap_array(array, big_endian: cython.bint):
379-
if (sys.byteorder == "big") != big_endian:
383+
if _is_big_endian != big_endian:
380384
return array.byteswap()
381385
return array
382386

@@ -429,23 +433,31 @@ def copy_array_to_plane(array, plane: VideoPlane, bytes_per_pixel: cython.uint):
429433

430434

431435
@cython.cfunc
436+
@cython.inline
432437
def useful_array(
433438
plane: VideoPlane, bytes_per_pixel: cython.uint = 1, dtype: str = "uint8"
434439
):
435440
"""
436-
Return the useful part of the VideoPlane as a single dimensional array.
441+
Return the useful part of the VideoPlane as a strided array.
437442
438-
We are simply discarding any padding which was added for alignment.
443+
We are simply creating a view that discards any padding which was added for
444+
alignment.
439445
"""
440446
import numpy as np
441447

442-
total_line_size: cython.size_t = abs(plane.line_size)
443-
useful_line_size: cython.size_t = plane.width * bytes_per_pixel
444-
if total_line_size == useful_line_size:
445-
return np.frombuffer(plane, dtype=dtype)
446-
arr = np.frombuffer(plane, np.uint8)
447-
arr = arr.reshape(-1, total_line_size)[:, 0:useful_line_size].reshape(-1)
448-
return arr.view(np.dtype(dtype))
448+
dtype_obj = np.dtype(dtype)
449+
total_line_size = abs(plane.frame.ptr.linesize[plane.index])
450+
itemsize = dtype_obj.itemsize
451+
channels = bytes_per_pixel // itemsize
452+
453+
if channels == 1:
454+
shape = (plane.height, plane.width)
455+
strides = (total_line_size, itemsize)
456+
else:
457+
shape = (plane.height, plane.width, channels)
458+
strides = (total_line_size, bytes_per_pixel, itemsize)
459+
460+
return np.ndarray(shape, dtype=dtype_obj, buffer=plane, strides=strides)
449461

450462

451463
@cython.cfunc
@@ -527,6 +539,8 @@ def planes(self):
527539
plane_count: cython.int = 0
528540
while plane_count < max_plane_count and self.ptr.extended_data[plane_count]:
529541
plane_count += 1
542+
if plane_count == 1:
543+
return (VideoPlane(self, 0),)
530544
return tuple([VideoPlane(self, i) for i in range(plane_count)])
531545

532546
@property
@@ -744,49 +758,50 @@ def to_ndarray(self, channel_last=False, **kwargs):
744758

745759
# check size
746760
format_name = frame.format.name
747-
height, width = frame.ptr.height, frame.ptr.width
748761
planes: tuple[VideoPlane, ...] = frame.planes
749-
if format_name in {"yuv420p", "yuvj420p", "yuyv422", "yuv422p10le", "yuv422p"}:
750-
assert width % 2 == 0, "the width has to be even for this pixel format"
751-
assert height % 2 == 0, "the height has to be even for this pixel format"
752-
753762
# cases planes are simply concatenated in shape (height, width, channels)
754763
if format_name in _np_pix_fmt_dtypes:
764+
if format_name == "yuyv422":
765+
assert frame.ptr.width % 2 == 0, "width has to be even for yuyv422"
766+
assert frame.ptr.height % 2 == 0, "height has to be even for yuyv422"
755767
itemsize: cython.uint
756768
itemsize, dtype = _np_pix_fmt_dtypes[format_name]
757-
if len(planes) == 1: # shortcut, avoid memory copy
758-
array = useful_array(planes[0], itemsize, dtype).reshape(
759-
height, width, -1
760-
)
769+
num_planes: cython.size_t = len(planes)
770+
if num_planes == 1: # shortcut, avoid memory copy
771+
array = useful_array(planes[0], itemsize, dtype)
761772
else: # general case
762-
array = np.empty((height, width, len(planes)), dtype=dtype)
763-
for i, plane in enumerate(planes):
764-
array[:, :, i] = useful_array(plane, itemsize, dtype).reshape(
765-
height, width
766-
)
773+
array = np.empty(
774+
(frame.ptr.height, frame.ptr.width, num_planes), dtype=dtype
775+
)
776+
if format_name.startswith("gbr"):
777+
plane_indices = (2, 0, 1, *range(3, num_planes))
778+
else:
779+
plane_indices = range(num_planes)
780+
for i, p_idx in enumerate(plane_indices):
781+
array[:, :, i] = useful_array(planes[p_idx], itemsize, dtype)
767782
array = byteswap_array(array, format_name.endswith("be"))
768-
if array.shape[2] == 1: # skip last channel for gray images
769-
return array.squeeze(2)
770-
if format_name.startswith("gbr"): # gbr -> rgb
771-
array[:, :, :3] = array[:, :, [2, 0, 1]]
772783
if not channel_last and format_name in {"yuv444p", "yuvj444p"}:
773784
array = np.moveaxis(array, 2, 0)
774785
return array
775786

776787
# special cases
777788
if format_name in {"yuv420p", "yuvj420p", "yuv422p"}:
789+
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
790+
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
778791
return np.hstack(
779792
[
780-
useful_array(planes[0]),
781-
useful_array(planes[1]),
782-
useful_array(planes[2]),
793+
useful_array(planes[0]).reshape(-1),
794+
useful_array(planes[1]).reshape(-1),
795+
useful_array(planes[2]).reshape(-1),
783796
]
784-
).reshape(-1, width)
797+
).reshape(-1, frame.ptr.width)
785798
if format_name == "yuv422p10le":
799+
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
800+
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
786801
# Read planes as uint16 at their original width
787-
y = useful_array(planes[0], 2, "uint16").reshape(height, width)
788-
u = useful_array(planes[1], 2, "uint16").reshape(height, width // 2)
789-
v = useful_array(planes[2], 2, "uint16").reshape(height, width // 2)
802+
y = useful_array(planes[0], 2, "uint16")
803+
u = useful_array(planes[1], 2, "uint16")
804+
v = useful_array(planes[2], 2, "uint16")
790805

791806
# Double the width of U and V by repeating each value
792807
u_full = np.repeat(u, 2, axis=1)
@@ -795,7 +810,7 @@ def to_ndarray(self, channel_last=False, **kwargs):
795810
return np.stack([y, u_full, v_full], axis=2)
796811
return np.stack([y, u_full, v_full], axis=0)
797812
if format_name == "pal8":
798-
image = useful_array(planes[0]).reshape(height, width)
813+
image = useful_array(planes[0])
799814
palette = (
800815
np.frombuffer(planes[1], "i4")
801816
.astype(">i4")
@@ -805,8 +820,11 @@ def to_ndarray(self, channel_last=False, **kwargs):
805820
return image, palette
806821
if format_name == "nv12":
807822
return np.hstack(
808-
[useful_array(planes[0]), useful_array(planes[1], 2)]
809-
).reshape(-1, width)
823+
[
824+
useful_array(planes[0]).reshape(-1),
825+
useful_array(planes[1], 2).reshape(-1),
826+
]
827+
).reshape(-1, frame.ptr.width)
810828

811829
raise ValueError(
812830
f"Conversion to numpy array with format `{format_name}` is not yet supported"

av/video/plane.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __cinit__(self, frame: VideoFrame, index: cython.int):
2626
frames_ctx.sw_format, frame.ptr.width, frame.ptr.height
2727
)
2828

29-
if fmt.name == "pal8" and index == 1:
29+
if index == 1 and fmt.name == "pal8":
3030
self.width = 256
3131
self.height = 1
3232
self.buffer_size = 256 * 4

0 commit comments

Comments
 (0)