Skip to content

Commit a56fccb

Browse files
authored
[ChunkCodecCore] BREAKING change the return type to MaybeSize (#72)
* return MaybeSize * Change return type * Fix for 1.12 and add more tests * test MaybeSize convert code * remove convert with `nothing` and revert change to `try_find_decoded_size` * improve error messages * refactor asserts * update changelogs * make JET happy
1 parent 87f7447 commit a56fccb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+386
-225
lines changed

Bitshuffle/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

77
## Unreleased
88

9+
- Update to `ChunkCodecCore` 0.6 [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72)
10+
911
## [v0.1.1](https://github.com/JuliaIO/ChunkCodecs.jl/tree/Bitshuffle-v0.1.1) - 2025-08-09
1012

1113
### Added

Bitshuffle/Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
name = "ChunkCodecBitshuffle"
22
uuid = "1d859bbf-6282-4c80-a370-34c59bf7ec11"
33
authors = ["nhz2 <nhz2@cornell.edu>"]
4-
version = "0.1.1"
4+
version = "0.2.0-dev"
55

66
[deps]
77
ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"
88

99
[compat]
10-
ChunkCodecCore = "0.5.1"
10+
ChunkCodecCore = "0.6"
1111
julia = "1.6"
1212

1313
[workspace]

Bitshuffle/src/ChunkCodecBitshuffle.jl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ using ChunkCodecCore:
88
DecodeOptions,
99
check_in_range,
1010
check_contiguous,
11-
DecodingError
11+
DecodingError,
12+
MaybeSize,
13+
NOT_SIZE,
14+
is_size
1215
import ChunkCodecCore:
1316
decode_options,
1417
try_decode!,
@@ -215,17 +218,17 @@ decoded_size_range(e::BShufCodec) = Int64(0):e.element_size:typemax(Int64)-1
215218

216219
encode_bound(::BShufCodec, src_size::Int64)::Int64 = src_size
217220

218-
function try_encode!(e::BShufCodec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
221+
function try_encode!(e::BShufCodec, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
219222
dst_size::Int64 = length(dst)
220223
src_size::Int64 = length(src)
221224
element_size = e.element_size
222225
block_size = e.block_size
223226
check_in_range(decoded_size_range(e); src_size)
224227
if dst_size < src_size
225-
nothing
228+
NOT_SIZE
226229
else
227230
apply_blocks!(trans_bit_elem!, src, dst, element_size, block_size)
228-
return src_size
231+
src_size
229232
end
230233
end
231234

@@ -255,7 +258,7 @@ decoded_size_range(x::BShufEncodeOptions) = decoded_size_range(x.codec)
255258

256259
encode_bound(x::BShufEncodeOptions, src_size::Int64)::Int64 = encode_bound(x.codec, src_size)
257260

258-
function try_encode!(x::BShufEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
261+
function try_encode!(x::BShufEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
259262
try_encode!(x.codec, dst, src)
260263
end
261264

@@ -285,7 +288,7 @@ function try_find_decoded_size(::BShufDecodeOptions, src::AbstractVector{UInt8})
285288
length(src)
286289
end
287290

288-
function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
291+
function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
289292
dst_size::Int64 = length(dst)
290293
src_size::Int64 = length(src)
291294
element_size = d.codec.element_size
@@ -295,10 +298,10 @@ function try_decode!(d::BShufDecodeOptions, dst::AbstractVector{UInt8}, src::Abs
295298
throw(BShufDecodingError("src_size isn't a multiple of element_size"))
296299
end
297300
if dst_size < src_size
298-
nothing
301+
NOT_SIZE
299302
else
300303
apply_blocks!(untrans_bit_elem!, src, dst, element_size, block_size)
301-
return src_size
304+
src_size
302305
end
303306
end
304307

Bitshuffle/src/compress.jl

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -120,14 +120,14 @@ function encode_bound(e::BShufLZEncodeOptions, src_size::Int64)::Int64
120120
bound
121121
end
122122

123-
function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
123+
function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
124124
check_contiguous(dst)
125125
check_contiguous(src)
126126
src_size::Int64 = length(src)
127127
dst_size::Int64 = length(dst)
128128
check_in_range(decoded_size_range(e); src_size)
129129
if dst_size < 12
130-
return nothing
130+
return NOT_SIZE
131131
end
132132
elem_size = e.codec.element_size
133133
# This get used to write to the header
@@ -155,30 +155,30 @@ function try_encode!(e::BShufLZEncodeOptions, dst::AbstractVector{UInt8}, src::A
155155
# The leftover bytes are copied at the end if needed.
156156
while src_left BLOCKED_MULT*elem_size
157157
if dst_left < 4
158-
return nothing # no space for block header
158+
return NOT_SIZE # no space for block header
159159
end
160160
if src_left < block_size*elem_size
161161
block_size = fld(src_left, BLOCKED_MULT*elem_size) * BLOCKED_MULT
162162
end
163163
src_offset = src_size - src_left
164164
trans_bit_elem!(tmp_buf_bshuf, Int64(0), src, src_offset, elem_size, block_size)
165-
compressed_nbytes = try_encode!(
165+
maybe_compressed_nbytes = try_encode!(
166166
e.options,
167167
@view(dst[end-dst_left+1+4:end]),
168168
@view(tmp_buf_bshuf[begin:begin+elem_size*block_size-1])
169-
)
170-
if isnothing(compressed_nbytes)
171-
return nothing # no space for compressed block
169+
)::MaybeSize
170+
if !is_size(maybe_compressed_nbytes)
171+
return NOT_SIZE # no space for compressed block
172172
end
173-
@assert !signbit(compressed_nbytes)
173+
compressed_nbytes = Int64(maybe_compressed_nbytes)
174174
store_int32_BE!(dst, dst_size - dst_left, Int32(compressed_nbytes))
175175
src_left -= block_size*elem_size
176176
dst_left -= 4 + compressed_nbytes
177177
@assert dst_left 0:dst_size
178178
@assert src_left 0:src_size
179179
end
180180
if src_left > dst_left
181-
return nothing # no space for leftover bytes
181+
return NOT_SIZE # no space for leftover bytes
182182
end
183183
src_offset = src_size - src_left
184184
dst_offset = dst_size - dst_left
@@ -242,14 +242,14 @@ function try_find_decoded_size(d::BShufLZDecodeOptions, src::AbstractVector{UInt
242242
end
243243
end
244244

245-
function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
245+
function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
246246
check_contiguous(dst)
247247
check_contiguous(src)
248-
decoded_size = try_find_decoded_size(d, src)
248+
decoded_size::Int64 = try_find_decoded_size(d, src)
249249
src_size::Int64 = length(src)
250250
dst_size::Int64 = length(dst)
251251
if decoded_size > dst_size
252-
return nothing
252+
return NOT_SIZE
253253
end
254254
src_left::Int64 = src_size
255255
dst_left::Int64 = decoded_size
@@ -294,9 +294,9 @@ function try_decode!(d::BShufLZDecodeOptions, dst::AbstractVector{UInt8}, src::A
294294
d.options,
295295
@view(tmp_buf_decode[begin:begin+block_size*elem_size-1]),
296296
@view(src[end-src_left+1:end-src_left+c_size])
297-
)
297+
)::MaybeSize
298298
src_left -= c_size
299-
if ret != block_size*elem_size
299+
if ret.val != block_size*elem_size
300300
throw(BShufDecodingError("saved decoded size is not correct"))
301301
end
302302
untrans_bit_elem!(dst, dst_offset, tmp_buf_decode, Int64(0), elem_size, block_size)

Bitshuffle/test/runtests.jl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ using ChunkCodecCore:
1818
DecodeOptions,
1919
NoopCodec,
2020
try_find_decoded_size,
21-
try_encode!
21+
try_encode!,
22+
MaybeSize,
23+
is_size
2224
using ChunkCodecTests: test_codec, test_encoder_decoder
2325
using ChunkCodecLibLz4
2426
using ChunkCodecLibZstd
@@ -75,7 +77,7 @@ function TestNoopEncodeOptions(;
7577
end
7678
ChunkCodecCore.encode_bound(::TestNoopEncodeOptions, src_size::Int64)::Int64 = src_size
7779
ChunkCodecCore.decoded_size_range(e::TestNoopEncodeOptions) = Int64(8):e.element_size:typemax(Int64)-Int64(1)
78-
function ChunkCodecCore.try_encode!(e::TestNoopEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::Union{Nothing, Int64}
80+
function ChunkCodecCore.try_encode!(e::TestNoopEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize
7981
dst_size::Int64 = length(dst)
8082
src_size::Int64 = length(src)
8183
check_in_range(decoded_size_range(e); src_size)
@@ -324,22 +326,22 @@ end
324326
c = encode(e, u)
325327
@test decode(d, c) == u
326328
for i in 1:length(c)
327-
@test isnothing(try_encode!(e, c[1:i-1], u))
329+
@test !is_size(try_encode!(e, c[1:i-1], u))
328330
end
329331
# zero length
330332
u = UInt8[]
331333
c = zeros(UInt8, 12)
332-
@test try_encode!(e, c, u) == length(c)
334+
@test try_encode!(e, c, u) == MaybeSize(length(c))
333335
@test decode(d, c) == u
334336
for i in 1:length(c)
335-
@test isnothing(try_encode!(e, c[1:i-1], u))
337+
@test !is_size(try_encode!(e, c[1:i-1], u))
336338
end
337339
# one length
338340
u = UInt8[0x00]
339341
c = zeros(UInt8, 12+1)
340-
@test try_encode!(e, c, u) == length(c)
342+
@test try_encode!(e, c, u) == MaybeSize(length(c))
341343
@test decode(d, c) == u
342344
for i in 1:length(c)
343-
@test isnothing(try_encode!(e, c[1:i-1], u))
345+
@test !is_size(try_encode!(e, c[1:i-1], u))
344346
end
345347
end

ChunkCodecCore/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

77
## Unreleased
88

9+
### BREAKING the return type of `try_encode`, `try_decode`, and `try_resize_decode!` changed to a new `MaybeSize` type [#72](https://github.com/JuliaIO/ChunkCodecs.jl/pull/72)
10+
911
## [v0.5.3](https://github.com/JuliaIO/ChunkCodecs.jl/tree/ChunkCodecCore-v0.5.3) - 2025-08-09
1012

1113
- Added support for Julia 1.6 [#68](https://github.com/JuliaIO/ChunkCodecs.jl/pull/68)

ChunkCodecCore/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "ChunkCodecCore"
22
uuid = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1"
33
authors = ["nhz2 <nhz2@cornell.edu>"]
4-
version = "0.5.3"
4+
version = "0.6.0-dev"
55

66
[compat]
77
julia = "1.6"

ChunkCodecCore/src/ChunkCodecCore.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ if VERSION >= v"1.11.0-DEV.469"
99
EncodeOptions,
1010
DecodeOptions,
1111
12+
MaybeSize,
13+
is_size,
14+
NOT_SIZE,
1215
DecodingError,
1316
DecodedSizeError,
1417
decode!,

ChunkCodecCore/src/errors.jl

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,34 +5,91 @@ Generic error for data that cannot be decoded.
55
"""
66
abstract type DecodingError <: Exception end
77

8+
"""
9+
struct MaybeSize
10+
val::Int64
11+
end
12+
13+
If `val ≥ 0` it is a size, and can be converted back and forth with `Int64`.
14+
If `val < 0` converting to and from `Int64` will error.
15+
If `val == typemin(Int64)` it is an unknown size.
16+
Otherwise it is a size hint of `-val`.
17+
18+
"""
19+
struct MaybeSize
20+
val::Int64
21+
end
22+
23+
"""
24+
const NOT_SIZE = MaybeSize(typemin(Int64))
25+
"""
26+
const NOT_SIZE = MaybeSize(typemin(Int64))
27+
function is_size(x::MaybeSize)::Bool
28+
!signbit(x.val)
29+
end
30+
function Base.Int64(x::MaybeSize)
31+
if !is_size(x)
32+
throw(InexactError(:Int64, Int64, x))
33+
else
34+
x.val
35+
end
36+
end
37+
function Base.convert(::Type{Int64}, x::MaybeSize)
38+
Int64(x)
39+
end
40+
function Base.convert(::Type{MaybeSize}, x::Int64)::MaybeSize
41+
if signbit(x)
42+
throw(InexactError(:convert, MaybeSize, x))
43+
else
44+
MaybeSize(x)
45+
end
46+
end
47+
848
"""
949
struct DecodedSizeError <: Exception
1050
DecodedSizeError(max_size, decoded_size)
1151
12-
Unable to decode the data because the decoded size is larger than `max_size`
13-
or smaller than expected.
14-
If the decoded size is unknown `decoded_size` is `nothing`.
52+
Exception thrown when the decoded data size doesn't match expectations or exceeds limits.
53+
54+
# Fields
55+
- `max_size::Int64`: The maximum allowed or expected size in bytes
56+
- `decoded_size::MaybeSize`: The actual decoded size, size hint, or `NOT_SIZE` if unknown
57+
58+
This error can occur in several scenarios:
59+
1. Decoded size exceeds the maximum allowed size
60+
2. Decoded size is less than expected
61+
3. Decoder provides a size hint when the decoded size exceeds limits
62+
4. Decoded size is completely unknown but exceeds limits
1563
"""
1664
struct DecodedSizeError <: Exception
1765
max_size::Int64
18-
decoded_size::Union{Nothing, Int64}
66+
decoded_size::MaybeSize
1967
end
2068

2169
function Base.showerror(io::IO, err::DecodedSizeError)
2270
print(io, "DecodedSizeError: ")
23-
if isnothing(err.decoded_size)
24-
print(io, "decoded size is greater than max size: ")
71+
if err.decoded_size === NOT_SIZE
72+
print(io, "decoded size > ")
2573
print(io, err.max_size)
26-
elseif err.decoded_size < err.max_size
27-
print(io, "decoded size: ")
28-
print(io, err.decoded_size)
29-
print(io, " is less than expected size: ")
74+
elseif !is_size(err.decoded_size)
75+
suggested_size = -err.decoded_size.val
76+
print(io, "decoded size > ")
3077
print(io, err.max_size)
78+
print(io, ", try max_size = ")
79+
print(io, suggested_size)
3180
else
32-
print(io, "decoded size: ")
33-
print(io, err.decoded_size)
34-
print(io, " is greater than max size: ")
35-
print(io, err.max_size)
81+
decoded_size = err.decoded_size.val
82+
if decoded_size < err.max_size
83+
print(io, "decoded size ")
84+
print(io, decoded_size)
85+
print(io, " < expected ")
86+
print(io, err.max_size)
87+
else
88+
print(io, "decoded size ")
89+
print(io, decoded_size)
90+
print(io, " > ")
91+
print(io, err.max_size)
92+
end
3693
end
3794
nothing
38-
end
95+
end

0 commit comments

Comments
 (0)