Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Breaking Change: don't try to normalize unicode in collision testing. #25

Merged
merged 2 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ZipArchives"
uuid = "49080126-0e18-4c2a-b176-c102e4b3760c"
authors = ["nhz2 <[email protected]>"]
version = "0.4.6"
version = "0.5.0"

[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
Expand Down
66 changes: 56 additions & 10 deletions src/filename-checks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ function basic_name_check(name::String)::Nothing
@argcheck !isempty(name)
@argcheck isvalid(name)
@argcheck !startswith(name, "/")
@argcheck !contains(name, "//")
@argcheck !contains(name, '\0')
@argcheck !contains(name, '\\')
@argcheck !contains(name, ':')
Expand All @@ -27,13 +28,58 @@ function basic_name_check(name::String)::Nothing
end
end

function norm_name(name::AbstractString)::String
lowercase(join(
split(
name,
'/';
keepempty=false
),
'/'
))
end
# The zip format seems to allow an empty base file name at the top level.
# Other base file name cannot be empty.
# Any directory name can be just a "/".
# For example "a///b.txt" is the following path ("a/", "/", "/", "b.txt")
# But there is no path for a file ("a/", ""),
# because the path "a/" is an empty directory
# or a place to store metadata about "a/"
# Then on extract on windows the empty "/" turn into "_".
# that is why I check !contains(name, "//")
# Any entry name that ends in a "/" is always interpreted as
# a directory entry.
# used_stripped_dir_names are the path to all directories with the trailing "/" removed
function check_name_used(name::String, used_names::Set{String}, used_stripped_dir_names::Set{String})::Nothing
# basic_name_check(name)
data = codeunits(name)
@argcheck name ∉ used_names
if !endswith(name, '/')
@argcheck name ∉ used_stripped_dir_names
end
# also any of the parent directories implied by name must not be files.
pos::Int = 1
while true
i = findnext('/', name, pos)
isnothing(i) && break
parent_data = @view(data[begin:i-1])
# need to rstrip because of repeated '/'
if isempty(parent_data) || parent_data[end] != UInt8('/')
# this effectively rstrips all '/'
parent_name = String(parent_data)
# @show parent_name
@argcheck parent_name ∉ used_names
end
pos = i+1
end
end

function add_name_used!(name::String, used_names::Set{String}, used_stripped_dir_names::Set{String})::Nothing
data = codeunits(name)
push!(used_names, name)
# also any of the parent directories implied by name must added to used_stripped_dir_names.
pos::Int = 1
while true
i = findnext('/', name, pos)
isnothing(i) && break
parent_data = @view(data[begin:i-1])
# need to rstrip because of repeated '/'
if isempty(parent_data) || parent_data[end] != UInt8('/')
# this effectively rstrips all '/'
parent_name = String(parent_data)
# @show parent_name
push!(used_stripped_dir_names, parent_name)
end
pos = i+1
end
end
17 changes: 14 additions & 3 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ end

Base.@kwdef mutable struct PartialEntry
name::String
"lowercase normalized name used to check for name collisions"
normed_name::Union{Nothing, String}
comment::String = ""
external_attrs::UInt32 = UInt32(0o0100644)<<16 # external file attributes: https://unix.stackexchange.com/questions/14705/the-zip-formats-external-file-attribute
method::UInt16 = Store # compression method
Expand All @@ -96,7 +94,19 @@ mutable struct ZipWriter{S<:IO} <: IO
partial_entry::Union{Nothing, PartialEntry}
closed::Bool
force_zip64::Bool
used_names_lower::Set{String}

"If checking names, this contains all committed entry names, unmodified."
used_names::Set{String}

"""
If checking names, this contains all implicit and explicit directory paths with all trailing '/'
removed. for example, the "/a.txt" entry name creates an explicit directory "/"
Which will be stored here as ""

This is because in zip land there is no concept of "root",
and directories are allowed to be empty strings.
"""
used_stripped_dir_names::Set{String}
check_names::Bool
transcoder::Union{Nothing, NoopStream{S}, DeflateCompressorStream{S}}
function ZipWriter(io::IO;
Expand All @@ -113,6 +123,7 @@ mutable struct ZipWriter{S<:IO} <: IO
false,
force_zip64,
Set{String}(),
Set{String}(),
check_names,
nothing,
)
Expand Down
29 changes: 11 additions & 18 deletions src/writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ function zip_append_archive(io::IO; trunc_footer=true, zip_kwargs=(;))::ZipWrite
w.entries = entries
w.central_dir_buffer = central_dir_buffer
if w.check_names
w.used_names_lower = Set{String}(norm_name(String(view(central_dir_buffer, e.name_range))) for e in entries)
for e in entries
add_name_used!(String(view(central_dir_buffer, e.name_range)), w.used_names, w.used_stripped_dir_names)
end
end
w
catch # close io if there is an error parsing entries
Expand Down Expand Up @@ -182,18 +184,15 @@ function zip_newfile(w::ZipWriter, name::AbstractString;
namestr::String = String(name)
@argcheck ncodeunits(namestr) ≤ typemax(UInt16)
@argcheck ncodeunits(comment) ≤ typemax(UInt16)
normed_name = nothing
if w.check_names
basic_name_check(namestr)
@argcheck !isnothing(external_attrs) || !endswith(namestr, "/")
normed_name = norm_name(namestr)
@argcheck normed_name ∉ w.used_names_lower
check_name_used(namestr, w.used_names, w.used_stripped_dir_names)
end
@assert !iswritable(w)
io = w._io
pe = PartialEntry(;
name=namestr,
normed_name,
comment,
w.force_zip64,
offset=0, # place holder offset
Expand Down Expand Up @@ -337,7 +336,7 @@ function zip_commitfile(w::ZipWriter)
end
entry = append_entry!(w.central_dir_buffer, pe)
if w.check_names
push!(w.used_names_lower, something(pe.normed_name))
add_name_used!(pe.name, w.used_names, w.used_stripped_dir_names)
end
push!(w.entries, entry)
end
Expand Down Expand Up @@ -394,19 +393,16 @@ function zip_writefile(w::ZipWriter, name::AbstractString, data::AbstractVector{
namestr::String = String(name)
@argcheck ncodeunits(namestr) ≤ typemax(UInt16)
@argcheck ncodeunits(comment) ≤ typemax(UInt16)
normed_name=nothing
if w.check_names
basic_name_check(namestr)
@argcheck !isnothing(external_attrs) || !endswith(namestr, "/")
normed_name = norm_name(namestr)
@argcheck normed_name ∉ w.used_names_lower
check_name_used(namestr, w.used_names, w.used_stripped_dir_names)
end
@assert !iswritable(w)
io = w._io
crc32 = zip_crc32(data)
pe = PartialEntry(;
name=namestr,
normed_name,
comment,
offset=0,# place holder offset
w.force_zip64,
Expand All @@ -429,7 +425,7 @@ function zip_writefile(w::ZipWriter, name::AbstractString, data::AbstractVector{
@assert !iswritable(w)
entry = append_entry!(w.central_dir_buffer, pe)
if w.check_names
push!(w.used_names_lower, something(pe.normed_name))
add_name_used!(namestr, w.used_names, w.used_stripped_dir_names)
end
push!(w.entries, entry)
nothing
Expand All @@ -438,19 +434,16 @@ end
"""
zip_name_collision(w::ZipWriter, new_name::AbstractString)::Bool

Return true if `new_name` matches an existing committed entry.

The check is case insensitive, and
insensitive to leading, trailing, and repeated `/`.
Return true if `new_name` exactly matches an existing committed entry name.
"""
zip_name_collision(w::ZipWriter, new_name::AbstractString)::Bool = zip_name_collision(w, String(new_name))
function zip_name_collision(w::ZipWriter, new_name::String)::Bool
if w.check_names
norm_name(new_name) ∈ w.used_names_lower
new_name ∈ w.used_names
else
nname = norm_name(new_name)
data = codeunits(new_name)
any(eachindex(w.entries)) do i
nname == norm_name(String(_name_view(w, i)))
data == _name_view(w, i)
end
end
end
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Random.seed!(1234)

# @test Any[] == detect_ambiguities(Base, Core, ZipArchives)
include("test_simple-usage.jl")
include("test_filename-checks.jl")
include("test_show.jl")
include("test_writer.jl")
include("test_reader.jl")
Expand Down
38 changes: 38 additions & 0 deletions test/test_filename-checks.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using ZipArchives
using Test
using Random

@testset "check_name_used unit tests" begin
# make sure check_name_used doesn't error with random input
for i in 1:100000
name = String(rand(UInt8, rand(1:10)))
ZipArchives.check_name_used(name, Set{String}(), Set{String}())
end
used_names = Set{String}()
used_dirs = Set{String}()
for i in 1:10000
name = String(rand(UInt8, rand(1:10)))
ZipArchives.add_name_used!(name, used_names, used_dirs)
end
for i in 1:1000
name = String(rand(UInt8, rand(1:10)))
used_names = Set{String}()
used_dirs = Set{String}()
ZipArchives.add_name_used!(name, used_names, used_dirs)
@test_throws ArgumentError ZipArchives.check_name_used(name, used_names, used_dirs)
if !endswith(name, "/")
@test_throws ArgumentError ZipArchives.check_name_used(name*"/", used_names, used_dirs)
end
end

used_names = Set{String}()
used_dirs = Set{String}()
ZipArchives.add_name_used!("", used_names, used_dirs)
@test_throws ArgumentError ZipArchives.check_name_used("/a.txt", used_names, used_dirs)
ZipArchives.check_name_used("a.txt", used_names, used_dirs)
ZipArchives.add_name_used!("a/b/c.txt", used_names, used_dirs)
@test_throws ArgumentError ZipArchives.check_name_used("a/b", used_names, used_dirs)
ZipArchives.check_name_used("a/b/", used_names, used_dirs)
ZipArchives.add_name_used!("a//c.txt", used_names, used_dirs)
ZipArchives.check_name_used("a/c.txt", used_names, used_dirs)
end
8 changes: 4 additions & 4 deletions test/test_show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ using Test
io = IOBuffer()
ZipWriter(io) do w
zip_writefile(w, "test", b"data")
zip_writefile(w, "test/foo", b"data")
zip_writefile(w, "testdir/foo", b"data")
end
data = take!(io)
r = ZipBufferReader(data)
@test repr(r) == "ZipArchives.ZipBufferReader($(data))"
@test sprint(io->(show(io, MIME"text/plain"(), r))) == """
246 byte, 2 entry ZipBufferReader{Vector{UInt8}}
252 byte, 2 entry ZipBufferReader{Vector{UInt8}}
total uncompressed size: 8 bytes
"test"
\"test/\""""
\"testdir/\""""
@test sprint(io->(show(IOContext(io, :displaysize => (3, 80)), MIME"text/plain"(), r))) == """
246 byte, 2 entry ZipBufferReader{Vector{UInt8}}
252 byte, 2 entry ZipBufferReader{Vector{UInt8}}
total uncompressed size: 8 bytes
⋮"""
end
Expand Down
13 changes: 7 additions & 6 deletions test/test_writer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@ ZipWriter(joinpath(tmp, "utf8.zip")) do w
@test !zip_name_collision(w, "test2")
@test !zip_name_collision(w, SubString("test2", 1:2))
@test !zip_name_collision(w, "test2.txt/")
@test !zip_name_collision(w, "test2.txt")
zip_commitfile(w)
@test !zip_name_collision(w, "test2")
@test zip_name_collision(w, "test2.txt/")
@test !zip_name_collision(w, "test2.txt/")
@test zip_name_collision(w, "test2.txt")
@test zip_name_collision(w, "Test2.txt")
@test !zip_name_collision(w, "Test2.txt")
@test zip_name_collision(w, "🐨.txt")
end

Expand Down Expand Up @@ -170,11 +171,11 @@ end
io = IOBuffer()
w = ZipWriter(io; check_names=false)
zip_mkdir(w, "empty_dir")
@test zip_name_collision(w, "empty_dir")
@test !zip_name_collision(w, "empty_dir")
@test zip_name_collision(w, "empty_dir/")
@test zip_name_collision(w, "empty_dir//")
@test zip_name_collision(w, "/empty_dir")
@test zip_name_collision(w, "/empty_Dir/")
@test !zip_name_collision(w, "empty_dir//")
@test !zip_name_collision(w, "/empty_dir")
@test !zip_name_collision(w, "/empty_Dir/")
@test !zip_name_collision(w, "foo/empty_dir")
# Adding symlinks requires check_names=false
ZipArchives.zip_symlink(w, "this is a invalid target", "symlink_entry")
Expand Down
Loading