Skip to content

Commit a90ca9a

Browse files
xal-0KristofferC
authored andcommitted
Make more types jl_static_show unambiguously (#58512)
Makes more types survive `jl_static_show` unambiguously: - Symbols - Symbols printed in the `:var"foo"` form use raw string escaping, fixing `:var"a\b"`, `:var"a\\"`, `:var"$a"`, etc. - Symbols that require parens use parens (`:(=)`, ...) - Signed integers: Except for `Int`, signed integers print like `Int8(1)`. - Floats: floats are printed in a naive but reversible (TODO: double check) way. `Inf(16|32|)` and `NaN(16|32|)` are printed, and `Float16`/`Float32` print the type (`Float32(1.5)`). `Float64`s are printed with a trailing `.0` if it is necessary to disambiguate from `Int`. Fixes #52677, #58484 (comment), #58484 (comment), and the specific case mentioned in #58484. Improves the situation for round-trip (inexhaustive list): - Non-canonical NaNs - BFloat16 - User-defined primitive types. This one is tricky, because they can have a size different from any type we have literals for. (cherry picked from commit b03ef6b)
1 parent 4bc9d6a commit a90ca9a

File tree

2 files changed

+157
-20
lines changed

2 files changed

+157
-20
lines changed

src/rtutils.c

Lines changed: 105 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66
#include "platform.h"
77

8+
#include <float.h>
9+
#include <math.h>
810
#include <stdlib.h>
911
#include <stdio.h>
1012
#include <string.h>
@@ -655,12 +657,12 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
655657
return 0;
656658
}
657659

658-
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
660+
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap, int raw) JL_NOTSAFEPOINT
659661
{
660662
size_t n = 0;
661663
if (wrap)
662664
n += jl_printf(out, "\"");
663-
if (!u8_isvalid(str, len)) {
665+
if (!raw && !u8_isvalid(str, len)) {
664666
// alternate print algorithm that preserves data if it's not UTF-8
665667
static const char hexdig[] = "0123456789abcdef";
666668
for (size_t i = 0; i < len; i++) {
@@ -677,7 +679,11 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
677679
int special = 0;
678680
for (size_t i = 0; i < len; i++) {
679681
uint8_t c = str[i];
680-
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
682+
if (raw && ((c == '\\' && i == len-1) || c == '"')) {
683+
special = 1;
684+
break;
685+
}
686+
else if (!raw && (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$')) {
681687
special = 1;
682688
break;
683689
}
@@ -686,6 +692,25 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
686692
jl_uv_puts(out, str, len);
687693
n += len;
688694
}
695+
else if (raw) {
696+
// REF: Base.escape_raw_string
697+
int escapes = 0;
698+
for (size_t i = 0; i < len; i++) {
699+
uint8_t c = str[i];
700+
if (c == '\\') {
701+
escapes++;
702+
}
703+
else {
704+
if (c == '"')
705+
for (escapes++; escapes > 0; escapes--)
706+
n += jl_printf(out, "\\");
707+
escapes = 0;
708+
}
709+
n += jl_printf(out, "%c", str[i]);
710+
}
711+
for (; escapes > 0; escapes--)
712+
n += jl_printf(out, "\\");
713+
}
689714
else {
690715
char buf[512];
691716
size_t i = 0;
@@ -701,18 +726,28 @@ static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len,
701726
return n;
702727
}
703728

729+
static int jl_is_quoted_sym(const char *sn)
730+
{
731+
static const char *const quoted_syms[] = {":", "::", ":=", "=", "==", "===", "=>", "`"};
732+
for (int i = 0; i < sizeof quoted_syms / sizeof *quoted_syms; i++)
733+
if (!strcmp(sn, quoted_syms[i]))
734+
return 1;
735+
return 0;
736+
}
737+
738+
// TODO: in theory, we need a separate function for showing symbols in an
739+
// expression context (where `Symbol("foo\x01bar")` is ok) and a syntactic
740+
// context (where var"" must be used).
704741
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
705742
{
706743
size_t n = 0;
707744
const char *sn = jl_symbol_name(name);
708-
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
709-
if (quoted) {
710-
n += jl_printf(out, "var");
711-
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
712-
n += jl_static_show_string(out, sn, strlen(sn), 1);
745+
if (jl_is_identifier(sn) || (jl_is_operator(sn) && !jl_is_quoted_sym(sn))) {
746+
n += jl_printf(out, "%s", sn);
713747
}
714748
else {
715-
n += jl_printf(out, "%s", sn);
749+
n += jl_printf(out, "var");
750+
n += jl_static_show_string(out, sn, strlen(sn), 1, 1);
716751
}
717752
return n;
718753
}
@@ -741,6 +776,51 @@ static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
741776
return 0;
742777
}
743778

779+
static size_t jl_static_show_float(JL_STREAM *out, double v,
780+
jl_datatype_t *vt) JL_NOTSAFEPOINT
781+
{
782+
size_t n = 0;
783+
// TODO: non-canonical NaNs do not round-trip
784+
// TOOD: BFloat16
785+
const char *size_suffix = vt == jl_float16_type ? "16" :
786+
vt == jl_float32_type ? "32" :
787+
"";
788+
// Requires minimum 1 (sign) + 17 (sig) + 1 (dot) + 5 ("e-123") + 1 (null)
789+
char buf[32];
790+
// Base B significand digits required to print n base-b significand bits
791+
// (including leading 1): N = 2 + floor(n/log(b, B))
792+
// Float16 5
793+
// Float32 9
794+
// Float64 17
795+
// REF: https://dl.acm.org/doi/pdf/10.1145/93542.93559
796+
if (isnan(v)) {
797+
n += jl_printf(out, "NaN%s", size_suffix);
798+
}
799+
else if (isinf(v)) {
800+
n += jl_printf(out, "%sInf%s", v < 0 ? "-" : "", size_suffix);
801+
}
802+
else if (vt == jl_float64_type) {
803+
n += jl_printf(out, "%#.17g", v);
804+
}
805+
else if (vt == jl_float32_type) {
806+
size_t m = snprintf(buf, sizeof buf, "%.9g", v);
807+
// If the exponent was printed, replace it with 'f'
808+
char *p = (char *)memchr(buf, 'e', m);
809+
if (p)
810+
*p = 'f';
811+
jl_uv_puts(out, buf, m);
812+
n += m;
813+
// If no exponent was printed, we must add one
814+
if (!p)
815+
n += jl_printf(out, "f0");
816+
}
817+
else {
818+
assert(vt == jl_float16_type);
819+
n += jl_printf(out, "Float16(%#.5g)", v);
820+
}
821+
return n;
822+
}
823+
744824
// `v` might be pointing to a field inlined in a structure therefore
745825
// `jl_typeof(v)` may not be the same with `vt` and only `vt` should be
746826
// used to determine the type of the value.
@@ -906,17 +986,21 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
906986
int f = *(uint32_t*)jl_data_ptr(v);
907987
n += jl_printf(out, "#<intrinsic #%d %s>", f, jl_intrinsic_name(f));
908988
}
989+
else if (vt == jl_long_type) {
990+
// Avoid unnecessary Int64(x)/Int32(x)
991+
n += jl_printf(out, "%" PRIdPTR, *(intptr_t*)v);
992+
}
909993
else if (vt == jl_int64_type) {
910-
n += jl_printf(out, "%" PRId64, *(int64_t*)v);
994+
n += jl_printf(out, "Int64(%" PRId64 ")", *(int64_t*)v);
911995
}
912996
else if (vt == jl_int32_type) {
913-
n += jl_printf(out, "%" PRId32, *(int32_t*)v);
997+
n += jl_printf(out, "Int32(%" PRId32 ")", *(int32_t*)v);
914998
}
915999
else if (vt == jl_int16_type) {
916-
n += jl_printf(out, "%" PRId16, *(int16_t*)v);
1000+
n += jl_printf(out, "Int16(%" PRId16 ")", *(int16_t*)v);
9171001
}
9181002
else if (vt == jl_int8_type) {
919-
n += jl_printf(out, "%" PRId8, *(int8_t*)v);
1003+
n += jl_printf(out, "Int8(%" PRId8 ")", *(int8_t*)v);
9201004
}
9211005
else if (vt == jl_uint64_type) {
9221006
n += jl_printf(out, "0x%016" PRIx64, *(uint64_t*)v);
@@ -937,11 +1021,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
9371021
n += jl_printf(out, "0x%08" PRIx32, *(uint32_t*)v);
9381022
#endif
9391023
}
1024+
else if (vt == jl_float16_type) {
1025+
n += jl_static_show_float(out, julia__gnu_h2f_ieee(*(uint16_t *)v), vt);
1026+
}
9401027
else if (vt == jl_float32_type) {
941-
n += jl_printf(out, "%gf", *(float*)v);
1028+
n += jl_static_show_float(out, *(float *)v, vt);
9421029
}
9431030
else if (vt == jl_float64_type) {
944-
n += jl_printf(out, "%g", *(double*)v);
1031+
n += jl_static_show_float(out, *(double *)v, vt);
9451032
}
9461033
else if (vt == jl_bool_type) {
9471034
n += jl_printf(out, "%s", *(uint8_t*)v ? "true" : "false");
@@ -950,7 +1037,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
9501037
n += jl_printf(out, "nothing");
9511038
}
9521039
else if (vt == jl_string_type) {
953-
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
1040+
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1, 0);
9541041
}
9551042
else if (v == jl_bottom_type) {
9561043
n += jl_printf(out, "Union{}");
@@ -1442,10 +1529,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
14421529
}
14431530
jl_printf(str, "\n@ ");
14441531
if (jl_is_string(file)) {
1445-
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
1532+
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0, 0);
14461533
}
14471534
else if (jl_is_symbol(file)) {
1448-
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
1535+
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0, 0);
14491536
}
14501537
jl_printf(str, ":");
14511538
jl_static_show(str, line);

test/show.jl

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ let oldout = stdout, olderr = stderr
696696
redirect_stderr(olderr)
697697
close(wrout)
698698
close(wrerr)
699-
@test fetch(out) == "Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123\"C\"\n"
699+
@test fetch(out) == "Int64 <: Signed\nTESTA\nTESTB\nΑ1Β2\"A\"\nA\n123.0000000000000000\"C\"\n"
700700
@test fetch(err) == "TESTA\nTESTB\nΑ1Β2\"A\"\n"
701701
finally
702702
redirect_stdout(oldout)
@@ -1489,8 +1489,58 @@ struct var"%X%" end # Invalid name without '#'
14891489
typeof(+),
14901490
var"#f#",
14911491
typeof(var"#f#"),
1492+
1493+
# Integers should round-trip (#52677)
1494+
1, UInt(1),
1495+
Int8(1), Int16(1), Int32(1), Int64(1),
1496+
UInt8(1), UInt16(1), UInt32(1), UInt64(1),
1497+
1498+
# Float round-trip
1499+
Float16(1), Float32(1), Float64(1),
1500+
Float16(1.5), Float32(1.5), Float64(1.5),
1501+
Float16(0.4893243538921085), Float32(0.4893243538921085), Float64(0.4893243538921085),
1502+
# Examples that require the full 5, 9, and 17 digits of precision
1503+
Float16(0.00010014), Float32(1.00000075f-36), Float64(-1.561051336605761e-182),
1504+
floatmax(Float16), floatmax(Float32), floatmax(Float64),
1505+
floatmin(Float16), floatmin(Float32), floatmin(Float64),
1506+
Float16(0.0), 0.0f0, 0.0,
1507+
Float16(-0.0), -0.0f0, -0.0,
1508+
Inf16, Inf32, Inf,
1509+
-Inf16, -Inf32, -Inf,
1510+
nextfloat(Float16(0)), nextfloat(Float32(0)), nextfloat(Float64(0)),
1511+
NaN16, NaN32, NaN,
1512+
Float16(1e3), 1f7, 1e16,
1513+
Float16(-1e3), -1f7, -1e16,
1514+
Float16(1e4), 1f8, 1e17,
1515+
Float16(-1e4), -1f8, -1e17,
1516+
1517+
# :var"" escaping rules differ from strings (#58484)
1518+
:foo,
1519+
:var"bar baz",
1520+
:var"a $b", # No escaping for $ in raw string
1521+
:var"a\b", # No escaping for backslashes in middle
1522+
:var"a\\", # Backslashes must be escaped at the end
1523+
:var"a\\\\",
1524+
:var"a\"b",
1525+
:var"a\"",
1526+
:var"\\\"",
1527+
:+, :var"+-",
1528+
:(=), :(:), :(::), # Requires quoting
1529+
Symbol("a\nb"),
1530+
1531+
Val(Float16(1.0)), Val(1f0), Val(1.0),
1532+
Val(:abc), Val(:(=)), Val(:var"a\b"),
1533+
1534+
Val(1), Val(Int8(1)), Val(Int16(1)), Val(Int32(1)), Val(Int64(1)), Val(Int128(1)),
1535+
Val(UInt(1)), Val(UInt8(1)), Val(UInt16(1)), Val(UInt32(1)), Val(UInt64(1)), Val(UInt128(1)),
1536+
1537+
# BROKEN
1538+
# Symbol("a\xffb"),
1539+
# User-defined primitive types
1540+
# Non-canonical NaNs
1541+
# BFloat16
14921542
)
1493-
@test v == eval(Meta.parse(static_shown(v)))
1543+
@test v === eval(Meta.parse(static_shown(v)))
14941544
end
14951545
end
14961546

0 commit comments

Comments
 (0)