Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 92 additions & 16 deletions src/hex_erl_tar.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
%% 3. -doc and -moduledoc attributes removed for OTP 24 compatibility
%% 4. safe_link_name/2 fixed to validate symlink targets relative to symlink's
%% parent directory instead of in isolation
%% 5. When extracting to disk (cwd option), stream file entries in chunks
%% instead of loading them fully into memory
%% 6. Default chunk_size to 65536 in add_opts instead of 0 with special case
%% 7. Use compressed instead of compressed_one for file:open for OTP 24 compat
%%
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
%%
Expand Down Expand Up @@ -147,12 +151,18 @@ extract1(eof, Reader, _, Acc) ->
extract1(#tar_header{name=Name,size=Size}=Header, Reader0, Opts, Acc0) ->
case check_extract(Name, Opts) of
true ->
case do_read(Reader0, Size) of
{ok, Bin, Reader1} ->
Acc = extract2(Header, Bin, Opts, Acc0),
{ok, Acc, Reader1};
{error, _} = Err ->
throw(Err)
case Opts#read_opts.output of
memory ->
case do_read(Reader0, Size) of
{ok, Bin, Reader1} ->
Acc = extract2(Header, Bin, Opts, Acc0),
{ok, Acc, Reader1};
{error, _} = Err ->
throw(Err)
end;
file ->
Reader1 = extract_to_file(Header, Reader0, Opts),
{ok, Acc0, Reader1}
end;
false ->
{ok, Acc0, skip_file(Reader0)}
Expand All @@ -173,6 +183,79 @@ extract2(Header, Bin, Opts, Acc) ->
throw(Err)
end.

extract_to_file(#tar_header{name=Name0}=Header, Reader0, Opts) ->
case typeflag(Header#tar_header.typeflag) of
regular ->
Name1 = make_safe_path(Name0, Opts),
case stream_to_file(Name1, Reader0, Opts) of
{ok, Reader1} ->
read_verbose(Opts, "x ~ts~n", [Name0]),
_ = set_extracted_file_info(Name1, Header),
Reader1;
{error, _} = Err ->
throw(Err)
end;
_ ->
Reader1 = skip_file(Reader0),
_ = write_extracted_element(Header, <<>>, Opts),
Reader1
end.

stream_to_file(Name, Reader0, Opts) ->
Write =
case Opts#read_opts.keep_old_files of
true ->
case file:read_file_info(Name) of
{ok, _} -> false;
_ -> true
end;
false -> true
end,
case Write of
true ->
ChunkSize = Opts#read_opts.chunk_size,
case open_output_file(Name) of
{ok, Fd} ->
try
stream_to_file_loop(Fd, Reader0, ChunkSize)
after
file:close(Fd)
end;
{error, _} = Err ->
Err
end;
false ->
{ok, skip_file(Reader0)}
end.

open_output_file(Name) ->
case file:open(Name, [write, raw, binary]) of
{ok, _} = Ok ->
Ok;
{error, enoent} ->
ok = make_dirs(Name, file),
file:open(Name, [write, raw, binary]);
{error, _} = Err ->
Err
end.

stream_to_file_loop(_Fd, #reg_file_reader{num_bytes=0}=Reader, _ChunkSize) ->
{ok, Reader};
stream_to_file_loop(_Fd, #sparse_file_reader{num_bytes=0}=Reader, _ChunkSize) ->
{ok, Reader};
stream_to_file_loop(Fd, Reader, ChunkSize) ->
case do_read(Reader, ChunkSize) of
{ok, Bin, Reader1} ->
case file:write(Fd, Bin) of
ok ->
stream_to_file_loop(Fd, Reader1, ChunkSize);
{error, _} = Err ->
Err
end;
{error, _} = Err ->
Err
end.

%% Checks if the file Name should be extracted.
check_extract(_, #read_opts{files=all}) ->
true;
Expand Down Expand Up @@ -382,13 +465,7 @@ open1({file, Fd}=Handle, read, [raw], Opts) ->
end;
open1({file, _Fd}=Handle, read, [], _Opts) ->
{error, {Handle, {incompatible_option, cooked}}};
open1(Name, Access, Raw, Opts0) when is_list(Name); is_binary(Name) ->
Opts = case lists:member(compressed, Opts0) andalso Access == read of
true ->
[compressed_one | (Opts0 -- [compressed])];
false ->
Opts0
end,
open1(Name, Access, Raw, Opts) when is_list(Name); is_binary(Name) ->
case file:open(Name, Raw ++ [binary, Access|Opts]) of
{ok, File} ->
{ok, #reader{handle=File,access=Access,func=fun file_op/2}};
Expand Down Expand Up @@ -1833,9 +1910,6 @@ do_write(#reader{handle=Handle,func=Fun}=Reader0, Data)
Err
end.

do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=0}=Opts)
when is_function(Fun, 2) ->
do_copy(Reader, Source, Opts#add_opts{chunk_size=65536});
do_copy(#reader{func=Fun}=Reader, Source, #add_opts{chunk_size=ChunkSize})
when is_function(Fun, 2) ->
case file:open(Source, [read, binary]) of
Expand Down Expand Up @@ -2009,6 +2083,8 @@ extract_opts([cooked|Rest], Opts=#read_opts{open_mode=OpenMode}) ->
extract_opts(Rest, Opts#read_opts{open_mode=[cooked|OpenMode]});
extract_opts([verbose|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{verbose=true});
extract_opts([{chunks,N}|Rest], Opts) ->
extract_opts(Rest, Opts#read_opts{chunk_size=N});
extract_opts([Other|Rest], Opts) ->
extract_opts(Rest, read_opts([Other], Opts));
extract_opts([], Opts) ->
Expand Down
11 changes: 8 additions & 3 deletions src/hex_erl_tar.hrl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
%% This file is a copy of erl_tar.hrl from OTP with no modifications.
%% This file is a copy of erl_tar.hrl from OTP with the following modifications:
%% 1. Added chunk_size field to #read_opts{} for streaming extraction to disk
%% 2. Added {chunks, pos_integer()} to extract_opt() type
%% 3. Default chunk_size to 65536 in #add_opts{} instead of 0
%%
%% OTP commit: 013041bd68c2547848e88963739edea7f0a1a90f
%%
Expand All @@ -25,7 +28,7 @@
%% Options used when adding files to a tar archive.
-record(add_opts, {
read_info, %% Fun to use for read file/link info.
chunk_size = 0, %% For file reading when sending to sftp. 0=do not chunk
chunk_size = 65536, %% Chunk size for reading files.
verbose = false, %% Verbose on/off.
atime = undefined,
mtime = undefined,
Expand All @@ -42,7 +45,8 @@
files = all, %% Set of files to extract (or all)
output = file :: 'file' | 'memory',
open_mode = [], %% Open mode options.
verbose = false :: boolean()}). %% Verbose on/off.
verbose = false :: boolean(), %% Verbose on/off.
chunk_size = 65536}). %% Chunk size for streaming to disk.
-type read_opts() :: #read_opts{}.

-type add_opt() :: dereference |
Expand All @@ -59,6 +63,7 @@

-type extract_opt() :: {cwd, string()} |
{files, [name_in_archive()]} |
{chunks, pos_integer()} |
compressed |
cooked |
memory |
Expand Down
Loading