From 3c14155089a6f7025825d9ffa5ee6d6886eedae3 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Sun, 9 Feb 2025 00:34:56 -0500 Subject: [PATCH 1/2] Reuse last used compressor --- src/constants.jl | 1 - src/types.jl | 4 ++++ src/writer.jl | 35 +++++++++++++++-------------------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/constants.jl b/src/constants.jl index 3091924..8010fc3 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -11,7 +11,6 @@ const Deflate64 = UInt16(9) see https://github.com/madler/zipflow/blob/2bef2123ebe519c17b18d2d0c3c71065088de952/zipflow.c#L214 =# function deflate_level_bits(level::Int)::UInt16 - @argcheck level ∈ (-1:9) if level == 9 0b010 # Maximum elseif level == 2 diff --git a/src/types.jl b/src/types.jl index d5ee87d..c43cdb1 100644 --- a/src/types.jl +++ b/src/types.jl @@ -79,6 +79,9 @@ mutable struct ZipWriter{S<:IO} <: IO used_stripped_dir_names::Set{String} check_names::Bool transcoder::Union{Nothing, NoopStream{WriteOffsetTracker{S}}, DeflateCompressorStream{WriteOffsetTracker{S}}} + + "Cached codec and compression level to avoid allocations" + compressor_cache::Union{Nothing, Tuple{DeflateCompressor, Int}} function ZipWriter(io::IO; check_names::Bool=true, own_io::Bool=false, @@ -97,6 +100,7 @@ mutable struct ZipWriter{S<:IO} <: IO Set{String}(), check_names, nothing, + nothing, ) end end \ No newline at end of file diff --git a/src/writer.jl b/src/writer.jl index 1b6ab75..61e75c9 100644 --- a/src/writer.jl +++ b/src/writer.jl @@ -208,14 +208,18 @@ function zip_newfile(w::ZipWriter, name::AbstractString; else Store end - codec, level_bits = if real_compression_method==Store + codec, level_bits = if real_compression_method == Store (Noop(), UInt16(0)) - elseif real_compression_method==Deflate + elseif real_compression_method == Deflate @argcheck compression_level ∈ (-1:9) - ( - DeflateCompressor(;level = compression_level), - deflate_level_bits(compression_level), - ) + old_compressor_cache = w.compressor_cache + if isnothing(old_compressor_cache) || old_compressor_cache[2] != compression_level + deflate_codec = DeflateCompressor(;level = compression_level) + w.compressor_cache = (deflate_codec, compression_level) + else + deflate_codec = something(old_compressor_cache)[1] + end + (deflate_codec, deflate_level_bits(compression_level)) else throw(ArgumentError("compression_method must be Deflate or Store")) end @@ -330,14 +334,9 @@ function zip_commitfile(w::ZipWriter) # If some error happens, the file will be partially written, # but not included in the central directory. # Finish the compressing here, but don't close underlying IO. - try - write(transcoder, TranscodingStreams.TOKEN_END) - # early exit incase io is broken - w._io.bad && throw_bad_io() - finally - # Prevent memory leak maybe. - close(transcoder) - end + write(transcoder, TranscodingStreams.TOKEN_END) + # early exit incase io is broken + w._io.bad && throw_bad_io() cur_offset = w._io.offset pe.compressed_size = cur_offset - pe.offset - pe.local_header_size @@ -382,12 +381,7 @@ function zip_abortfile(w::ZipWriter) w.transcoder = nothing w.partial_entry = nothing # Finish the compressing here, but don't close underlying IO. - try - write(transcoder, TranscodingStreams.TOKEN_END) - finally - # Prevent memory leak maybe. - close(transcoder) - end + write(transcoder, TranscodingStreams.TOKEN_END) end nothing end @@ -518,6 +512,7 @@ function Base.close(w::ZipWriter) zip_commitfile(w) finally w.partial_entry = nothing + w.compressor_cache = nothing try write_footer(w._io, w.entries, w.central_dir_buffer; w.force_zip64) finally From 8125e0f04df2ad4358d15c547bd203cf8689ab8a Mon Sep 17 00:00:00 2001 From: nhz2 Date: Sun, 9 Feb 2025 12:20:26 -0500 Subject: [PATCH 2/2] remove unneeded close method and update ZipStreams test version --- src/writer.jl | 1 - test/test_writer.jl | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/writer.jl b/src/writer.jl index 61e75c9..f473450 100644 --- a/src/writer.jl +++ b/src/writer.jl @@ -277,7 +277,6 @@ Base.write(w::ZipWriter, x::UInt8) = write(w, Ref(x)) # WriteOffsetTracker Base.isopen(w::WriteOffsetTracker) = !w.bad -Base.close(w::WriteOffsetTracker) = nothing # this protects the underlying stream from being closed by TranscodingStreams Base.isreadable(w::WriteOffsetTracker) = false Base.write(w::WriteOffsetTracker, x::UInt8) = write(w, Ref(x)) diff --git a/test/test_writer.jl b/test/test_writer.jl index 8fea46e..3bbc61b 100644 --- a/test/test_writer.jl +++ b/test/test_writer.jl @@ -123,14 +123,14 @@ include("external_unzippers.jl") end end -if VERSION ≥ v"1.7.0" # ZipStreams requires julia 1.7 +if VERSION ≥ v"1.11.0" # ZipStreams requires julia 1.11 @testset "Writer compat with ZipStreams" begin # setup test env for ZipStreams worker = Malt.Worker() Malt.remote_eval_fetch(worker, quote import Pkg Pkg.activate(;temp=true) - Pkg.add(name="ZipStreams", version="2.2.0") + Pkg.add(name="ZipStreams", version="3.0.0") import ZipStreams nothing end) @@ -140,7 +140,7 @@ if VERSION ≥ v"1.7.0" # ZipStreams requires julia 1.7 dir = ZipReader(read(zippath)) Malt.remote_eval_fetch(worker, quote ZipStreams.zipsource($(zippath)) do zs - ZipStreams.validate(zs) + ZipStreams.is_valid!(zs) || error("archive not valid") end nothing end) @@ -151,7 +151,7 @@ if VERSION ≥ v"1.7.0" # ZipStreams requires julia 1.7 for i in 1:zip_nentries(dir) name, data = Malt.remote_eval_fetch(worker, quote f = ZipStreams.next_file(zs) - (f.info.name, read(f,String)) + (ZipStreams.info(f).name, read(f,String)) end) @test zip_readentry(dir, name, String) == data end