@@ -2456,6 +2456,121 @@ def GetBinaryFileType(infile, filestart=0, closefp=True):
24562456 # -------------- FALLBACK --------------
24572457 return False
24582458
2459+ def _get_seek_consts ():
2460+ """Return (SEEK_DATA, SEEK_HOLE) if supported, else (None, None)."""
2461+ seek_data = getattr (os , "SEEK_DATA" , None )
2462+ seek_hole = getattr (os , "SEEK_HOLE" , None )
2463+ if seek_data is None or seek_hole is None :
2464+ return None , None
2465+ return seek_data , seek_hole
2466+
2467+ def pack_sparse_to_stream (path , out_fp , bufsize = 1024 * 1024 ):
2468+ """
2469+ Write ONLY data extents from sparse file `path` into `out_fp`.
2470+ Returns: (logical_size, extents, stored_bytes)
2471+ extents: list of (offset, length) in logical file
2472+ stored_bytes: total bytes written to out_fp
2473+ """
2474+ st = os .stat (path , follow_symlinks = False )
2475+ logical_size = int (st .st_size )
2476+ extents = []
2477+ stored = 0
2478+
2479+ SEEK_DATA , SEEK_HOLE = _get_seek_consts ()
2480+
2481+ with open (path , "rb" , buffering = 0 ) as f :
2482+ if SEEK_DATA is not None and SEEK_HOLE is not None :
2483+ # Kernel knows where holes are (best, fastest, exact).
2484+ pos = 0
2485+ while pos < logical_size :
2486+ try :
2487+ data_off = os .lseek (f .fileno (), pos , SEEK_DATA )
2488+ except OSError :
2489+ break # no more data
2490+ try :
2491+ hole_off = os .lseek (f .fileno (), data_off , SEEK_HOLE )
2492+ except OSError :
2493+ hole_off = logical_size
2494+ if hole_off > logical_size :
2495+ hole_off = logical_size
2496+
2497+ length = hole_off - data_off
2498+ if length <= 0 :
2499+ pos = max (pos + 1 , hole_off )
2500+ continue
2501+
2502+ extents .append ((data_off , length ))
2503+ # copy that extent’s bytes into out_fp
2504+ os .lseek (f .fileno (), data_off , os .SEEK_SET )
2505+ remaining = length
2506+ while remaining :
2507+ chunk = f .read (min (bufsize , remaining ))
2508+ if not chunk :
2509+ break
2510+ out_fp .write (chunk )
2511+ stored += len (chunk )
2512+ remaining -= len (chunk )
2513+
2514+ pos = hole_off
2515+ else :
2516+ # Portable fallback (no SEEK_HOLE/DATA): scan for non-zero blocks.
2517+ # Not perfect (won't detect "real zeros" vs "holes"), but works as a fallback.
2518+ block = 4096
2519+ pos = 0
2520+ while pos < logical_size :
2521+ chunk = f .read (block )
2522+ if not chunk :
2523+ break
2524+ if any (b != 0 for b in chunk ):
2525+ off = pos
2526+ # extend this run while blocks have any non-zero
2527+ run = bytearray (chunk )
2528+ while True :
2529+ nxt = f .read (block )
2530+ if not nxt or not any (b != 0 for b in nxt ):
2531+ if nxt :
2532+ # rewind one block if it was all-zero (we read too far)
2533+ f .seek (- len (nxt ), os .SEEK_CUR )
2534+ break
2535+ run .extend (nxt )
2536+ extents .append ((off , len (run )))
2537+ out_fp .write (run )
2538+ stored += len (run )
2539+ pos = off + len (run )
2540+ else :
2541+ pos += len (chunk )
2542+
2543+ out_fp .seek (0 , os .SEEK_SET )
2544+ return logical_size , extents , stored
2545+
2546+ def write_sparse_to_fileobj (out_fp , logical_size , extents , in_fp , bufsize = 1024 * 1024 ):
2547+ """
2548+ Recreate sparse file layout into an already-open writable file-like object.
2549+ """
2550+ out_fp .seek (0 )
2551+ out_fp .truncate (int (logical_size ))
2552+
2553+ for off , length in extents :
2554+ out_fp .seek (int (off ), os .SEEK_SET )
2555+ remaining = int (length )
2556+ while remaining :
2557+ chunk = in_fp .read (min (bufsize , remaining ))
2558+ if not chunk :
2559+ raise EOFError ("Archive ended while reading sparse extent data" )
2560+ out_fp .write (chunk )
2561+ remaining -= len (chunk )
2562+
2563+ def unpack_sparse_to_path (in_fp , out_path , logical_size , extents , bufsize = 1024 * 1024 ):
2564+ os .makedirs (os .path .dirname (out_path ) or "." , exist_ok = True )
2565+
2566+ with open (out_path , "wb" ) as f :
2567+ write_sparse_to_fileobj (f , logical_size , extents , in_fp , bufsize )
2568+
2569+ try :
2570+ f .flush ()
2571+ os .fsync (f .fileno ())
2572+ except Exception :
2573+ pass
24592574
24602575def _is_valid_zlib_header (cmf , flg ):
24612576 """
@@ -5996,7 +6111,8 @@ def AppendFilesWithContentToList(infiles, dirlistfromtxt=False, extradata=[], js
59966111 # Types that should be considered zero-length in the archive context:
59976112 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
59986113 # Types that have actual data to read:
5999- data_types = {0 , 7 , 12 }
6114+ data_types = {0 , 7 }
6115+ sparse_types = {12 }
60006116 if ftype in zero_length_types :
60016117 fsize = format (int ("0" ), 'x' ).lower ()
60026118 elif ftype in data_types :
@@ -6313,7 +6429,8 @@ def AppendFilesWithContentFromTarFileToList(infile, extradata=[], jsondata={}, c
63136429 # Types that should be considered zero-length in the archive context:
63146430 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
63156431 # Types that have actual data to read:
6316- data_types = {0 , 7 , 12 }
6432+ data_types = {0 , 7 }
6433+ sparse_types = {12 }
63176434 if ftype in zero_length_types :
63186435 fsize = format (int ("0" ), 'x' ).lower ()
63196436 elif ftype in data_types :
0 commit comments