Skip to content

Commit 8b2008c

Browse files
committed
Small update
1 parent c323e4a commit 8b2008c

File tree

1 file changed

+119
-2
lines changed

1 file changed

+119
-2
lines changed

pyfoxfile/pyfoxfile.py

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2456,6 +2456,121 @@ def GetBinaryFileType(infile, filestart=0, closefp=True):
24562456
# -------------- FALLBACK --------------
24572457
return False
24582458

2459+
def _get_seek_consts():
2460+
"""Return (SEEK_DATA, SEEK_HOLE) if supported, else (None, None)."""
2461+
seek_data = getattr(os, "SEEK_DATA", None)
2462+
seek_hole = getattr(os, "SEEK_HOLE", None)
2463+
if seek_data is None or seek_hole is None:
2464+
return None, None
2465+
return seek_data, seek_hole
2466+
2467+
def pack_sparse_to_stream(path, out_fp, bufsize=1024*1024):
2468+
"""
2469+
Write ONLY data extents from sparse file `path` into `out_fp`.
2470+
Returns: (logical_size, extents, stored_bytes)
2471+
extents: list of (offset, length) in logical file
2472+
stored_bytes: total bytes written to out_fp
2473+
"""
2474+
st = os.stat(path, follow_symlinks=False)
2475+
logical_size = int(st.st_size)
2476+
extents = []
2477+
stored = 0
2478+
2479+
SEEK_DATA, SEEK_HOLE = _get_seek_consts()
2480+
2481+
with open(path, "rb", buffering=0) as f:
2482+
if SEEK_DATA is not None and SEEK_HOLE is not None:
2483+
# Kernel knows where holes are (best, fastest, exact).
2484+
pos = 0
2485+
while pos < logical_size:
2486+
try:
2487+
data_off = os.lseek(f.fileno(), pos, SEEK_DATA)
2488+
except OSError:
2489+
break # no more data
2490+
try:
2491+
hole_off = os.lseek(f.fileno(), data_off, SEEK_HOLE)
2492+
except OSError:
2493+
hole_off = logical_size
2494+
if hole_off > logical_size:
2495+
hole_off = logical_size
2496+
2497+
length = hole_off - data_off
2498+
if length <= 0:
2499+
pos = max(pos + 1, hole_off)
2500+
continue
2501+
2502+
extents.append((data_off, length))
2503+
# copy that extent’s bytes into out_fp
2504+
os.lseek(f.fileno(), data_off, os.SEEK_SET)
2505+
remaining = length
2506+
while remaining:
2507+
chunk = f.read(min(bufsize, remaining))
2508+
if not chunk:
2509+
break
2510+
out_fp.write(chunk)
2511+
stored += len(chunk)
2512+
remaining -= len(chunk)
2513+
2514+
pos = hole_off
2515+
else:
2516+
# Portable fallback (no SEEK_HOLE/DATA): scan for non-zero blocks.
2517+
# Not perfect (won't detect "real zeros" vs "holes"), but works as a fallback.
2518+
block = 4096
2519+
pos = 0
2520+
while pos < logical_size:
2521+
chunk = f.read(block)
2522+
if not chunk:
2523+
break
2524+
if any(b != 0 for b in chunk):
2525+
off = pos
2526+
# extend this run while blocks have any non-zero
2527+
run = bytearray(chunk)
2528+
while True:
2529+
nxt = f.read(block)
2530+
if not nxt or not any(b != 0 for b in nxt):
2531+
if nxt:
2532+
# rewind one block if it was all-zero (we read too far)
2533+
f.seek(-len(nxt), os.SEEK_CUR)
2534+
break
2535+
run.extend(nxt)
2536+
extents.append((off, len(run)))
2537+
out_fp.write(run)
2538+
stored += len(run)
2539+
pos = off + len(run)
2540+
else:
2541+
pos += len(chunk)
2542+
2543+
out_fp.seek(0, os.SEEK_SET)
2544+
return logical_size, extents, stored
2545+
2546+
def write_sparse_to_fileobj(out_fp, logical_size, extents, in_fp, bufsize=1024*1024):
2547+
"""
2548+
Recreate sparse file layout into an already-open writable file-like object.
2549+
"""
2550+
out_fp.seek(0)
2551+
out_fp.truncate(int(logical_size))
2552+
2553+
for off, length in extents:
2554+
out_fp.seek(int(off), os.SEEK_SET)
2555+
remaining = int(length)
2556+
while remaining:
2557+
chunk = in_fp.read(min(bufsize, remaining))
2558+
if not chunk:
2559+
raise EOFError("Archive ended while reading sparse extent data")
2560+
out_fp.write(chunk)
2561+
remaining -= len(chunk)
2562+
2563+
def unpack_sparse_to_path(in_fp, out_path, logical_size, extents, bufsize=1024*1024):
2564+
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
2565+
2566+
with open(out_path, "wb") as f:
2567+
write_sparse_to_fileobj(f, logical_size, extents, in_fp, bufsize)
2568+
2569+
try:
2570+
f.flush()
2571+
os.fsync(f.fileno())
2572+
except Exception:
2573+
pass
24592574

24602575
def _is_valid_zlib_header(cmf, flg):
24612576
"""
@@ -5996,7 +6111,8 @@ def AppendFilesWithContentToList(infiles, dirlistfromtxt=False, extradata=[], js
59966111
# Types that should be considered zero-length in the archive context:
59976112
zero_length_types = {1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13}
59986113
# Types that have actual data to read:
5999-
data_types = {0, 7, 12}
6114+
data_types = {0, 7}
6115+
sparse_types = {12}
60006116
if ftype in zero_length_types:
60016117
fsize = format(int("0"), 'x').lower()
60026118
elif ftype in data_types:
@@ -6313,7 +6429,8 @@ def AppendFilesWithContentFromTarFileToList(infile, extradata=[], jsondata={}, c
63136429
# Types that should be considered zero-length in the archive context:
63146430
zero_length_types = {1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13}
63156431
# Types that have actual data to read:
6316-
data_types = {0, 7, 12}
6432+
data_types = {0, 7}
6433+
sparse_types = {12}
63176434
if ftype in zero_length_types:
63186435
fsize = format(int("0"), 'x').lower()
63196436
elif ftype in data_types:

0 commit comments

Comments
 (0)