Skip to content

Commit 90deb1b

Browse files
authored
add transcode (#288)
* add transcode (fixes #284) * fixed method ambig and account for NUL termination of legacy utf16/utf32 functions * resize rather than slice, to avoid making an unnecessary copy * add missing T -> T transcode methods * another transcode test * whoops
1 parent 3541c2d commit 90deb1b

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ Currently, the `@compat` macro supports the following syntaxes:
8787

8888
* `unsafe_get` returns the `:value` field of a `Nullable` object without any null-check and has a generic fallback for non-`Nullable` argument. [#18484](https://github.com/JuliaLang/julia/pull/18484) (Also, `isnull` has a generic fallback for non-`Nullable` argument.)
8989

90+
* `transcode` converts between UTF-xx string encodings in Julia 0.5 (as a lightweight
91+
alternative to the LegacyStrings package), [#17323](https://github.com/JuliaLang/julia/pull/17323).
92+
9093
## Renamed functions
9194

9295
* `pointer_to_array` and `pointer_to_string` have been replaced with `unsafe_wrap(Array, ...)` and `unsafe_wrap(String, ...)` respectively.

src/Compat.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,4 +1698,30 @@ if VERSION < v"0.6.0-dev.1256"
16981698
Base.take!(io::Base.AbstractIOBuffer) = takebuf_array(io)
16991699
end
17001700

1701+
# julia #17323
1702+
if VERSION < v"0.5.0-dev+5380"
1703+
export transcode
1704+
transcode{T<:Compat.String}(::Type{T}, src::Union{Compat.String,Vector{UInt8}}) = utf8(src)
1705+
transcode{T<:Compat.String}(::Type{T}, src::Vector{UInt16}) = utf8(utf16(src))
1706+
transcode{T<:Compat.String}(::Type{T}, src::Vector{UInt32}) = utf8(utf32(src))
1707+
transcode(::Type{UInt8}, src::Vector{UInt8}) = src
1708+
transcode(::Type{UInt8}, src) = transcode(Compat.String, src).data
1709+
function transcode(::Type{UInt16}, src::Union{Compat.String,Vector{UInt8}})
1710+
d = utf16(utf8(src)).data
1711+
return resize!(d, length(d)-1) # strip off trailing NUL codeunit
1712+
end
1713+
function transcode(::Type{UInt32}, src::Union{Compat.String,Vector{UInt8}})
1714+
d = utf32(utf8(src)).data
1715+
return resize!(d, length(d)-1) # strip off trailing NUL codeunit
1716+
end
1717+
transcode(::Type{UInt16}, src::Vector{UInt16}) = src
1718+
transcode(::Type{UInt32}, src::Vector{UInt32}) = src
1719+
if Cwchar_t == Int32
1720+
transcode(::Type{Cwchar_t}, src::Vector{Cwchar_t}) = src
1721+
transcode(::Type{Cwchar_t}, src) = reinterpret(Cwchar_t, transcode(UInt32, src))
1722+
transcode(::Type{UInt8}, src::Vector{Cwchar_t}) = transcode(UInt8, reinterpret(UInt32, src))
1723+
transcode(T, src::Vector{Cwchar_t}) = transcode(T, reinterpret(UInt32, src))
1724+
end
1725+
end
1726+
17011727
end # module

test/runtests.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,3 +1524,13 @@ let io = IOBuffer()
15241524
write(io, "bbb")
15251525
@test String(take!(io)) == "bbb"
15261526
end
1527+
1528+
let s = "Koala test: 🐨"
1529+
@test transcode(UInt16, s) == UInt16[75,111,97,108,97,32,116,101,115,116,58,32,55357,56360]
1530+
@test transcode(UInt32, s) == UInt32[75,111,97,108,97,32,116,101,115,116,58,32,128040]
1531+
for T in (UInt8,UInt16,UInt32,Cwchar_t)
1532+
@test transcode(Compat.String, transcode(T, s)) == s
1533+
@test transcode(UInt8, transcode(T, s)) == s.data
1534+
@test transcode(T, s) == transcode(T, s.data) == transcode(T, transcode(T, s))
1535+
end
1536+
end

0 commit comments

Comments
 (0)