Skip to content

Commit a7c4296

Browse files
committed
Implement take_string!
This function creates a string from an IOBuffer or a Vector, emptying the input object and reusing the memory where possible.
1 parent ead23db commit a7c4296

File tree

8 files changed

+144
-2
lines changed

8 files changed

+144
-2
lines changed

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ New library features
2929

3030
* `sort(keys(::Dict))` and `sort(values(::Dict))` now automatically collect, they previously threw ([#56978]).
3131
* `Base.AbstractOneTo` is added as a supertype of one-based axes, with `Base.OneTo` as its subtype ([#56902]).
32+
* `takestring!(::IOBuffer)` removes the content from the buffer, returning the content as a `String`.
3233

3334
Standard library changes
3435
------------------------

base/exports.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ export
680680
split,
681681
string,
682682
strip,
683+
takestring!,
683684
textwidth,
684685
thisind,
685686
titlecase,

base/iobuffer.jl

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,80 @@ function take!(io::IOBuffer)
783783
return data
784784
end
785785

786+
"Internal method. This method can be faster than takestring!, because it does not
787+
reset the buffer to a usable state, and it does not check for io.reinit.
788+
Using the buffer after calling unsafe_takestring! may cause undefined behaviour.
789+
This function is meant to be used when the buffer is only used as a temporary
790+
string builder, which is discarded after the string is built."
791+
function unsafe_takestring!(io::IOBuffer)
792+
used_span = get_used_span(io)
793+
nbytes = length(used_span)
794+
from = first(used_span)
795+
isempty(used_span) && return ""
796+
# The C function can only copy from the start of the memory.
797+
# Fortunately, in most cases, the offset will be zero.
798+
return if isone(from)
799+
ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), io.data, nbytes)
800+
else
801+
mem = StringMemory(nbytes % UInt)
802+
unsafe_copyto!(mem, 1, io.data, from, nbytes)
803+
unsafe_takestring(mem)
804+
end
805+
end
806+
807+
"""
808+
takestring!(io::IOBuffer) -> String
809+
810+
Return the content of `io` as a `String`, resetting the buffer to its initial
811+
state.
812+
This is preferred over calling `String(take!(io))` to create a string from
813+
an `IOBuffer`.
814+
815+
# Examples
816+
```jldoctest
817+
julia> io = IOBuffer();
818+
819+
julia> write(io, [0x61, 0x62, 0x63]);
820+
821+
julia> s = takestring!(io)
822+
"abc"
823+
824+
julia> isempty(take!(io)) # io is now empty
825+
true
826+
```
827+
828+
!!! compat "Julia 1.13"
829+
This function requires at least Julia 1.13.
830+
"""
831+
function takestring!(io::IOBuffer)
832+
# If the buffer has been used up and needs to be replaced, there are no bytes, and
833+
# we can return an empty string without interacting with the buffer at all.
834+
io.reinit && return ""
835+
836+
# If the iobuffer is writable, taking will remove the buffer from `io`.
837+
# So, we reset the iobuffer, and directly unsafe takestring.
838+
return if io.writable
839+
s = unsafe_takestring!(io)
840+
io.reinit = true
841+
io.mark = -1
842+
io.ptr = 1
843+
io.size = 0
844+
io.offset_or_compacted = 0
845+
s
846+
else
847+
# If the buffer is not writable, taking will NOT remove the buffer,
848+
# so if we just converted the buffer to a string, garbage collecting
849+
# the string would free the memory underneath the iobuffer
850+
used_span = get_used_span(io)
851+
mem = StringMemory(length(used_span))
852+
unsafe_copyto!(mem, 1, io.data, first(used_span), length(used_span))
853+
unsafe_takestring(mem)
854+
end
855+
end
856+
857+
# Fallback methods
858+
takestring!(io::GenericIOBuffer) = String(take!(io))
859+
786860
"""
787861
_unsafe_take!(io::IOBuffer)
788862

base/strings/string.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ In other cases, `Vector{UInt8}` data may be copied, but `v` is truncated anyway
6262
to guarantee consistent behavior.
6363
"""
6464
String(v::AbstractVector{UInt8}) = unsafe_takestring(copyto!(StringMemory(length(v)), v))
65+
6566
function String(v::Vector{UInt8})
66-
#return ccall(:jl_array_to_string, Ref{String}, (Any,), v)
6767
len = length(v)
6868
len == 0 && return ""
6969
ref = v.ref
@@ -84,6 +84,24 @@ function unsafe_takestring(m::Memory{UInt8})
8484
isempty(m) ? "" : ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, length(m))
8585
end
8686

87+
"""
88+
takestring!(x) -> String
89+
90+
Create a string from the content of `x`, emptying `x`.
91+
92+
# Examples
93+
```jldoctest
94+
julia> v = [0x61, 0x62, 0x63];
95+
96+
julia> s = takestring!(v)
97+
"abc"
98+
99+
julia> isempty(v)
100+
true
101+
```
102+
"""
103+
takestring!(v::Vector{UInt8}) = String(v)
104+
87105
"""
88106
unsafe_string(p::Ptr{UInt8}, [length::Integer])
89107

doc/src/base/strings.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Base.SubstitutionString
2929
Base.@s_str
3030
Base.@raw_str
3131
Base.@b_str
32+
Base.takestring!
3233
Base.Docs.@html_str
3334
Base.Docs.@text_str
3435
Base.isvalid(::Any)

src/genericmemory.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,6 @@ JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_
197197
}
198198
int how = jl_genericmemory_how(m);
199199
size_t mlength = m->length;
200-
m->length = 0;
201200
if (how != 0) {
202201
jl_value_t *o = jl_genericmemory_data_owner_field(m);
203202
jl_genericmemory_data_owner_field(m) = NULL;

test/iobuffer.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,36 @@ end
321321
@test_throws ArgumentError seek(io, 0)
322322
end
323323

324+
@testset "takestring!" begin
325+
buf = IOBuffer()
326+
write(buf, "abcø")
327+
s = takestring!(buf)
328+
@test isempty(takestring!(buf))
329+
@test s == "abcø"
330+
write(buf, "xyz")
331+
@test takestring!(buf) == "xyz"
332+
buf = IOBuffer()
333+
334+
# Test with a nonzero offset in the buffer
335+
v = rand(UInt8, 8)
336+
for i in 1:8
337+
pushfirst!(v, rand(UInt8))
338+
end
339+
buf = IOBuffer(v)
340+
s = String(copy(v))
341+
@test takestring!(buf) == s
342+
343+
# Test with a non-writable IOBuffer
344+
buf = IOBuffer(b"abcdef")
345+
read(buf, UInt8)
346+
@test takestring!(buf) == "abcdef"
347+
348+
buf = new_unseekable_buffer()
349+
write(buf, "abcde")
350+
read(buf, UInt16)
351+
@test takestring!(buf) == "cde"
352+
end
353+
324354
@testset "Read/write readonly IOBuffer" begin
325355
io = IOBuffer("hamster\nguinea pig\nturtle")
326356
@test position(io) == 0

test/strings/basic.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,24 @@ using Random
4949
end
5050
end
5151

52+
@testset "takestring!" begin
53+
v = [0x61, 0x62, 0x63]
54+
old_mem = v.ref.mem
55+
@test takestring!(v) == "abc"
56+
@test isempty(v)
57+
@test v.ref.mem !== old_mem # memory is changed
58+
for v in [
59+
UInt8[],
60+
[0x01, 0x02, 0x03],
61+
collect(codeunits("æøå"))
62+
]
63+
cp = copy(v)
64+
s = takestring!(v)
65+
@test isempty(v)
66+
@test codeunits(s) == cp
67+
end
68+
end
69+
5270
@testset "{starts,ends}with" begin
5371
@test startswith("abcd", 'a')
5472
@test startswith('a')("abcd")

0 commit comments

Comments
 (0)