@@ -374,9 +374,13 @@ class PictureType(IntEnum):
374374 BI = lib .AV_PICTURE_TYPE_BI # BI type
375375
376376
377+ _is_big_endian = cython .declare (cython .bint , sys .byteorder == "big" )
378+
379+
377380@cython .cfunc
381+ @cython .inline
378382def byteswap_array (array , big_endian : cython .bint ):
379- if ( sys . byteorder == "big" ) != big_endian :
383+ if _is_big_endian != big_endian :
380384 return array .byteswap ()
381385 return array
382386
@@ -429,23 +433,31 @@ def copy_array_to_plane(array, plane: VideoPlane, bytes_per_pixel: cython.uint):
429433
430434
431435@cython .cfunc
436+ @cython .inline
432437def useful_array (
433438 plane : VideoPlane , bytes_per_pixel : cython .uint = 1 , dtype : str = "uint8"
434439):
435440 """
436- Return the useful part of the VideoPlane as a single dimensional array.
441+ Return the useful part of the VideoPlane as a strided array.
437442
438- We are simply discarding any padding which was added for alignment.
443+ We are simply creating a view that discards any padding which was added for
444+ alignment.
439445 """
440446 import numpy as np
441447
442- total_line_size : cython .size_t = abs (plane .line_size )
443- useful_line_size : cython .size_t = plane .width * bytes_per_pixel
444- if total_line_size == useful_line_size :
445- return np .frombuffer (plane , dtype = dtype )
446- arr = np .frombuffer (plane , np .uint8 )
447- arr = arr .reshape (- 1 , total_line_size )[:, 0 :useful_line_size ].reshape (- 1 )
448- return arr .view (np .dtype (dtype ))
448+ dtype_obj = np .dtype (dtype )
449+ total_line_size = abs (plane .frame .ptr .linesize [plane .index ])
450+ itemsize = dtype_obj .itemsize
451+ channels = bytes_per_pixel // itemsize
452+
453+ if channels == 1 :
454+ shape = (plane .height , plane .width )
455+ strides = (total_line_size , itemsize )
456+ else :
457+ shape = (plane .height , plane .width , channels )
458+ strides = (total_line_size , bytes_per_pixel , itemsize )
459+
460+ return np .ndarray (shape , dtype = dtype_obj , buffer = plane , strides = strides )
449461
450462
451463@cython .cfunc
@@ -527,6 +539,8 @@ def planes(self):
527539 plane_count : cython .int = 0
528540 while plane_count < max_plane_count and self .ptr .extended_data [plane_count ]:
529541 plane_count += 1
542+ if plane_count == 1 :
543+ return (VideoPlane (self , 0 ),)
530544 return tuple ([VideoPlane (self , i ) for i in range (plane_count )])
531545
532546 @property
@@ -744,49 +758,50 @@ def to_ndarray(self, channel_last=False, **kwargs):
744758
745759 # check size
746760 format_name = frame .format .name
747- height , width = frame .ptr .height , frame .ptr .width
748761 planes : tuple [VideoPlane , ...] = frame .planes
749- if format_name in {"yuv420p" , "yuvj420p" , "yuyv422" , "yuv422p10le" , "yuv422p" }:
750- assert width % 2 == 0 , "the width has to be even for this pixel format"
751- assert height % 2 == 0 , "the height has to be even for this pixel format"
752-
753762 # cases planes are simply concatenated in shape (height, width, channels)
754763 if format_name in _np_pix_fmt_dtypes :
764+ if format_name == "yuyv422" :
765+ assert frame .ptr .width % 2 == 0 , "width has to be even for yuyv422"
766+ assert frame .ptr .height % 2 == 0 , "height has to be even for yuyv422"
755767 itemsize : cython .uint
756768 itemsize , dtype = _np_pix_fmt_dtypes [format_name ]
757- if len (planes ) == 1 : # shortcut, avoid memory copy
758- array = useful_array (planes [0 ], itemsize , dtype ).reshape (
759- height , width , - 1
760- )
769+ num_planes : cython .size_t = len (planes )
770+ if num_planes == 1 : # shortcut, avoid memory copy
771+ array = useful_array (planes [0 ], itemsize , dtype )
761772 else : # general case
762- array = np .empty ((height , width , len (planes )), dtype = dtype )
763- for i , plane in enumerate (planes ):
764- array [:, :, i ] = useful_array (plane , itemsize , dtype ).reshape (
765- height , width
766- )
773+ array = np .empty (
774+ (frame .ptr .height , frame .ptr .width , num_planes ), dtype = dtype
775+ )
776+ if format_name .startswith ("gbr" ):
777+ plane_indices = (2 , 0 , 1 , * range (3 , num_planes ))
778+ else :
779+ plane_indices = range (num_planes )
780+ for i , p_idx in enumerate (plane_indices ):
781+ array [:, :, i ] = useful_array (planes [p_idx ], itemsize , dtype )
767782 array = byteswap_array (array , format_name .endswith ("be" ))
768- if array .shape [2 ] == 1 : # skip last channel for gray images
769- return array .squeeze (2 )
770- if format_name .startswith ("gbr" ): # gbr -> rgb
771- array [:, :, :3 ] = array [:, :, [2 , 0 , 1 ]]
772783 if not channel_last and format_name in {"yuv444p" , "yuvj444p" }:
773784 array = np .moveaxis (array , 2 , 0 )
774785 return array
775786
776787 # special cases
777788 if format_name in {"yuv420p" , "yuvj420p" , "yuv422p" }:
789+ assert frame .ptr .width % 2 == 0 , "width has to be even for this format"
790+ assert frame .ptr .height % 2 == 0 , "height has to be even for this format"
778791 return np .hstack (
779792 [
780- useful_array (planes [0 ]),
781- useful_array (planes [1 ]),
782- useful_array (planes [2 ]),
793+ useful_array (planes [0 ]). reshape ( - 1 ) ,
794+ useful_array (planes [1 ]). reshape ( - 1 ) ,
795+ useful_array (planes [2 ]). reshape ( - 1 ) ,
783796 ]
784- ).reshape (- 1 , width )
797+ ).reshape (- 1 , frame . ptr . width )
785798 if format_name == "yuv422p10le" :
799+ assert frame .ptr .width % 2 == 0 , "width has to be even for this format"
800+ assert frame .ptr .height % 2 == 0 , "height has to be even for this format"
786801 # Read planes as uint16 at their original width
787- y = useful_array (planes [0 ], 2 , "uint16" ). reshape ( height , width )
788- u = useful_array (planes [1 ], 2 , "uint16" ). reshape ( height , width // 2 )
789- v = useful_array (planes [2 ], 2 , "uint16" ). reshape ( height , width // 2 )
802+ y = useful_array (planes [0 ], 2 , "uint16" )
803+ u = useful_array (planes [1 ], 2 , "uint16" )
804+ v = useful_array (planes [2 ], 2 , "uint16" )
790805
791806 # Double the width of U and V by repeating each value
792807 u_full = np .repeat (u , 2 , axis = 1 )
@@ -795,7 +810,7 @@ def to_ndarray(self, channel_last=False, **kwargs):
795810 return np .stack ([y , u_full , v_full ], axis = 2 )
796811 return np .stack ([y , u_full , v_full ], axis = 0 )
797812 if format_name == "pal8" :
798- image = useful_array (planes [0 ]). reshape ( height , width )
813+ image = useful_array (planes [0 ])
799814 palette = (
800815 np .frombuffer (planes [1 ], "i4" )
801816 .astype (">i4" )
@@ -805,8 +820,11 @@ def to_ndarray(self, channel_last=False, **kwargs):
805820 return image , palette
806821 if format_name == "nv12" :
807822 return np .hstack (
808- [useful_array (planes [0 ]), useful_array (planes [1 ], 2 )]
809- ).reshape (- 1 , width )
823+ [
824+ useful_array (planes [0 ]).reshape (- 1 ),
825+ useful_array (planes [1 ], 2 ).reshape (- 1 ),
826+ ]
827+ ).reshape (- 1 , frame .ptr .width )
810828
811829 raise ValueError (
812830 f"Conversion to numpy array with format `{ format_name } ` is not yet supported"
0 commit comments