Skip to content

Commit 9f9a8ec

Browse files
vtjnashKristofferC
authored andcommitted
static-show: improve accuracy of some printings (#52799)
- Show strings with escaping, rather than trying to output the text unmodified. - Show symbols with the same formatting as Strings - Avoid accidentally defining a broken Core.show method for NamedTuple (cherry picked from commit bd3eab6)
1 parent 99b6c16 commit 9f9a8ec

File tree

10 files changed

+126
-78
lines changed

10 files changed

+126
-78
lines changed

base/namedtuple.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,8 @@ function convert(::Type{NT}, nt::NamedTuple{names}) where {names, NT<:NamedTuple
193193
end
194194

195195
if nameof(@__MODULE__) === :Base
196-
Tuple(nt::NamedTuple) = (nt...,)
197-
(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
198-
end
196+
Tuple(nt::NamedTuple) = (nt...,)
197+
(::Type{T})(nt::NamedTuple) where {T <: Tuple} = (t = Tuple(nt); t isa T ? t : convert(T, t)::T)
199198

200199
function show(io::IO, t::NamedTuple)
201200
n = nfields(t)
@@ -229,6 +228,7 @@ function show(io::IO, t::NamedTuple)
229228
print(io, ")")
230229
end
231230
end
231+
end
232232

233233
eltype(::Type{T}) where T<:NamedTuple = nteltype(T)
234234
nteltype(::Type) = Any

base/show.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1744,7 +1744,7 @@ function show_sym(io::IO, sym::Symbol; allow_macroname=false)
17441744
print(io, '@')
17451745
show_sym(io, Symbol(sym_str[2:end]))
17461746
else
1747-
print(io, "var", repr(string(sym)))
1747+
print(io, "var", repr(string(sym))) # TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
17481748
end
17491749
end
17501750

src/ast.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
942942
return expr;
943943
}
944944

945-
JL_DLLEXPORT int jl_is_operator(char *sym)
945+
JL_DLLEXPORT int jl_is_operator(const char *sym)
946946
{
947947
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
948948
fl_context_t *fl_ctx = &ctx->fl;
@@ -951,7 +951,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
951951
return res;
952952
}
953953

954-
JL_DLLEXPORT int jl_is_unary_operator(char *sym)
954+
JL_DLLEXPORT int jl_is_unary_operator(const char *sym)
955955
{
956956
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
957957
fl_context_t *fl_ctx = &ctx->fl;
@@ -960,7 +960,7 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
960960
return res;
961961
}
962962

963-
JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
963+
JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym)
964964
{
965965
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
966966
fl_context_t *fl_ctx = &ctx->fl;
@@ -969,7 +969,7 @@ JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
969969
return res;
970970
}
971971

972-
JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
972+
JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym)
973973
{
974974
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
975975
fl_context_t *fl_ctx = &ctx->fl;
@@ -978,7 +978,7 @@ JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
978978
return res;
979979
}
980980

981-
JL_DLLEXPORT int jl_operator_precedence(char *sym)
981+
JL_DLLEXPORT int jl_operator_precedence(const char *sym)
982982
{
983983
jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
984984
fl_context_t *fl_ctx = &ctx->fl;

src/flisp/print.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ static void print_string(fl_context_t *fl_ctx, ios_t *f, char *str, size_t sz)
518518
}
519519
else {
520520
while (i < sz) {
521-
size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, 1, 0);
521+
size_t n = u8_escape(buf, sizeof(buf), str, &i, sz, "\"", 0);
522522
outsn(fl_ctx, buf, f, n-1);
523523
}
524524
}

src/julia.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,11 +1955,11 @@ JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms);
19551955
JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
19561956

19571957

1958-
JL_DLLEXPORT int jl_is_operator(char *sym);
1959-
JL_DLLEXPORT int jl_is_unary_operator(char *sym);
1960-
JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
1961-
JL_DLLEXPORT int jl_is_syntactic_operator(char *sym);
1962-
JL_DLLEXPORT int jl_operator_precedence(char *sym);
1958+
JL_DLLEXPORT int jl_is_operator(const char *sym);
1959+
JL_DLLEXPORT int jl_is_unary_operator(const char *sym);
1960+
JL_DLLEXPORT int jl_is_unary_and_binary_operator(const char *sym);
1961+
JL_DLLEXPORT int jl_is_syntactic_operator(const char *sym);
1962+
JL_DLLEXPORT int jl_operator_precedence(const char *sym);
19631963

19641964
STATIC_INLINE int jl_vinfo_sa(uint8_t vi)
19651965
{

src/rtutils.c

Lines changed: 90 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ static size_t jl_show_svec(JL_STREAM *out, jl_svec_t *t, const char *head, const
572572
JL_DLLEXPORT int jl_id_start_char(uint32_t wc) JL_NOTSAFEPOINT;
573573
JL_DLLEXPORT int jl_id_char(uint32_t wc) JL_NOTSAFEPOINT;
574574

575-
JL_DLLEXPORT int jl_is_identifier(char *str) JL_NOTSAFEPOINT
575+
JL_DLLEXPORT int jl_is_identifier(const char *str) JL_NOTSAFEPOINT
576576
{
577577
size_t i = 0;
578578
uint32_t wc = u8_nextchar(str, &i);
@@ -655,22 +655,64 @@ static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname
655655
return 0;
656656
}
657657

658-
static size_t jl_static_show_x_sym_escaped(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
658+
static size_t jl_static_show_string(JL_STREAM *out, const char *str, size_t len, int wrap) JL_NOTSAFEPOINT
659659
{
660660
size_t n = 0;
661-
662-
char *sn = jl_symbol_name(name);
663-
int hidden = 0;
664-
if (!(jl_is_identifier(sn) || jl_is_operator(sn))) {
665-
hidden = 1;
661+
if (wrap)
662+
n += jl_printf(out, "\"");
663+
if (!u8_isvalid(str, len)) {
664+
// alternate print algorithm that preserves data if it's not UTF-8
665+
static const char hexdig[] = "0123456789abcdef";
666+
for (size_t i = 0; i < len; i++) {
667+
uint8_t c = str[i];
668+
if (c == '\\' || c == '"' || c == '$')
669+
n += jl_printf(out, "\\%c", c);
670+
else if (c >= 32 && c < 0x7f)
671+
n += jl_printf(out, "%c", c);
672+
else
673+
n += jl_printf(out, "\\x%c%c", hexdig[c>>4], hexdig[c&0xf]);
674+
}
666675
}
667-
668-
if (hidden) {
669-
n += jl_printf(out, "var\"");
676+
else {
677+
int special = 0;
678+
for (size_t i = 0; i < len; i++) {
679+
uint8_t c = str[i];
680+
if (c < 32 || c == 0x7f || c == '\\' || c == '"' || c == '$') {
681+
special = 1;
682+
break;
683+
}
684+
}
685+
if (!special) {
686+
jl_uv_puts(out, str, len);
687+
n += len;
688+
}
689+
else {
690+
char buf[512];
691+
size_t i = 0;
692+
while (i < len) {
693+
size_t r = u8_escape(buf, sizeof(buf), str, &i, len, "\"$", 0);
694+
jl_uv_puts(out, buf, r - 1);
695+
n += r - 1;
696+
}
697+
}
670698
}
671-
n += jl_printf(out, "%s", sn);
672-
if (hidden) {
699+
if (wrap)
673700
n += jl_printf(out, "\"");
701+
return n;
702+
}
703+
704+
static size_t jl_static_show_symbol(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
705+
{
706+
size_t n = 0;
707+
const char *sn = jl_symbol_name(name);
708+
int quoted = !jl_is_identifier(sn) && !jl_is_operator(sn);
709+
if (quoted) {
710+
n += jl_printf(out, "var");
711+
// TODO: this is not quite right, since repr uses String escaping rules, and Symbol uses raw string rules
712+
n += jl_static_show_string(out, sn, strlen(sn), 1);
713+
}
714+
else {
715+
n += jl_printf(out, "%s", sn);
674716
}
675717
return n;
676718
}
@@ -788,11 +830,6 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
788830
// Types are printed as a fully qualified name, with parameters, e.g.
789831
// `Base.Set{Int}`, and function types are printed as e.g. `typeof(Main.f)`
790832
jl_datatype_t *dv = (jl_datatype_t*)v;
791-
jl_sym_t *globname;
792-
int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
793-
jl_sym_t *sym = globfunc ? globname : dv->name->name;
794-
char *sn = jl_symbol_name(sym);
795-
size_t quote = 0;
796833
if (dv->name == jl_tuple_typename) {
797834
if (dv == jl_tuple_type)
798835
return jl_printf(out, "Tuple");
@@ -825,8 +862,13 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
825862
return n;
826863
}
827864
if (ctx.quiet) {
828-
return jl_printf(out, "%s", jl_symbol_name(dv->name->name));
865+
return jl_static_show_symbol(out, dv->name->name);
829866
}
867+
jl_sym_t *globname;
868+
int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
869+
jl_sym_t *sym = globfunc ? globname : dv->name->name;
870+
char *sn = jl_symbol_name(sym);
871+
size_t quote = 0;
830872
if (globfunc) {
831873
n += jl_printf(out, "typeof(");
832874
}
@@ -839,7 +881,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
839881
quote = 1;
840882
}
841883
}
842-
n += jl_static_show_x_sym_escaped(out, sym);
884+
n += jl_static_show_symbol(out, sym);
843885
if (globfunc) {
844886
n += jl_printf(out, ")");
845887
if (quote) {
@@ -908,9 +950,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
908950
n += jl_printf(out, "nothing");
909951
}
910952
else if (vt == jl_string_type) {
911-
n += jl_printf(out, "\"");
912-
jl_uv_puts(out, jl_string_data(v), jl_string_len(v)); n += jl_string_len(v);
913-
n += jl_printf(out, "\"");
953+
n += jl_static_show_string(out, jl_string_data(v), jl_string_len(v), 1);
914954
}
915955
else if (v == jl_bottom_type) {
916956
n += jl_printf(out, "Union{}");
@@ -959,7 +999,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
959999
n += jl_printf(out, ")");
9601000
n += jl_printf(out, "<:");
9611001
}
962-
n += jl_static_show_x_sym_escaped(out, var->name);
1002+
n += jl_static_show_symbol(out, var->name);
9631003
if (showbounds && (ub != (jl_value_t*)jl_any_type || lb != jl_bottom_type)) {
9641004
// show type-var upper bound if it is defined, or if we showed the lower bound
9651005
int ua = jl_is_unionall(ub);
@@ -977,27 +1017,24 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
9771017
n += jl_static_show_x(out, (jl_value_t*)m->parent, depth, ctx);
9781018
n += jl_printf(out, ".");
9791019
}
980-
n += jl_printf(out, "%s", jl_symbol_name(m->name));
1020+
n += jl_static_show_symbol(out, m->name);
9811021
}
9821022
else if (vt == jl_symbol_type) {
983-
char *sn = jl_symbol_name((jl_sym_t*)v);
984-
int quoted = !jl_is_identifier(sn) && jl_operator_precedence(sn) == 0;
985-
if (quoted)
986-
n += jl_printf(out, "Symbol(\"");
987-
else
988-
n += jl_printf(out, ":");
989-
n += jl_printf(out, "%s", sn);
990-
if (quoted)
991-
n += jl_printf(out, "\")");
1023+
n += jl_printf(out, ":");
1024+
n += jl_static_show_symbol(out, (jl_sym_t*)v);
9921025
}
9931026
else if (vt == jl_ssavalue_type) {
9941027
n += jl_printf(out, "SSAValue(%" PRIuPTR ")",
9951028
(uintptr_t)((jl_ssavalue_t*)v)->id);
9961029
}
9971030
else if (vt == jl_globalref_type) {
9981031
n += jl_static_show_x(out, (jl_value_t*)jl_globalref_mod(v), depth, ctx);
999-
char *name = jl_symbol_name(jl_globalref_name(v));
1000-
n += jl_printf(out, jl_is_identifier(name) ? ".%s" : ".:(%s)", name);
1032+
jl_sym_t *name = jl_globalref_name(v);
1033+
n += jl_printf(out, ".");
1034+
if (jl_is_operator(jl_symbol_name(name)))
1035+
n += jl_printf(out, ":(%s)", jl_symbol_name(name));
1036+
else
1037+
n += jl_static_show_symbol(out, name);
10011038
}
10021039
else if (vt == jl_gotonode_type) {
10031040
n += jl_printf(out, "goto %" PRIuPTR, jl_gotonode_label(v));
@@ -1031,17 +1068,17 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
10311068
else if (vt == jl_expr_type) {
10321069
jl_expr_t *e = (jl_expr_t*)v;
10331070
if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
1034-
n += jl_static_show_x(out, jl_exprarg(e,0), depth, ctx);
1071+
n += jl_static_show_x(out, jl_exprarg(e, 0), depth, ctx);
10351072
n += jl_printf(out, " = ");
1036-
n += jl_static_show_x(out, jl_exprarg(e,1), depth, ctx);
1073+
n += jl_static_show_x(out, jl_exprarg(e, 1), depth, ctx);
10371074
}
10381075
else {
1039-
char sep = ' ';
1040-
n += jl_printf(out, "Expr(:%s", jl_symbol_name(e->head));
1076+
n += jl_printf(out, "Expr(");
1077+
n += jl_static_show_x(out, (jl_value_t*)e->head, depth, ctx);
10411078
size_t i, len = jl_array_len(e->args);
10421079
for (i = 0; i < len; i++) {
1043-
n += jl_printf(out, ",%c", sep);
1044-
n += jl_static_show_x(out, jl_exprarg(e,i), depth, ctx);
1080+
n += jl_printf(out, ", ");
1081+
n += jl_static_show_x(out, jl_exprarg(e, i), depth, ctx);
10451082
}
10461083
n += jl_printf(out, ")");
10471084
}
@@ -1128,7 +1165,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
11281165
}
11291166
}
11301167

1131-
n += jl_static_show_x_sym_escaped(out, sym);
1168+
n += jl_static_show_symbol(out, sym);
11321169

11331170
if (globfunc) {
11341171
if (quote) {
@@ -1164,8 +1201,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
11641201
jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
11651202
for (; i < tlen; i++) {
11661203
if (!istuple) {
1167-
jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
1168-
n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
1204+
jl_sym_t *fname = (jl_sym_t*)(isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i));
1205+
if (fname == NULL || !jl_is_symbol(fname))
1206+
n += jl_static_show_x(out, (jl_value_t*)fname, depth, ctx);
1207+
else if (jl_is_operator(jl_symbol_name(fname)))
1208+
n += jl_printf(out, "(%s)", jl_symbol_name(fname));
1209+
else
1210+
n += jl_static_show_symbol(out, fname);
1211+
n += jl_printf(out, "=");
11691212
}
11701213
size_t offs = jl_field_offset(vt, i);
11711214
char *fld_ptr = (char*)v + offs;
@@ -1300,7 +1343,7 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
13001343
if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
13011344
((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
13021345
((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
1303-
n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
1346+
n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->mt->name);
13041347
}
13051348
else {
13061349
n += jl_printf(s, "(::");
@@ -1399,10 +1442,10 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
13991442
}
14001443
jl_printf(str, "\n@ ");
14011444
if (jl_is_string(file)) {
1402-
jl_uv_puts(str, jl_string_data(file), jl_string_len(file));
1445+
jl_static_show_string(str, jl_string_data(file), jl_string_len(file), 0);
14031446
}
14041447
else if (jl_is_symbol(file)) {
1405-
jl_printf(str, "%s", jl_symbol_name((jl_sym_t*)file));
1448+
jl_static_show_string(str, jl_symbol_name((jl_sym_t*)file), strlen(jl_symbol_name((jl_sym_t*)file)), 0);
14061449
}
14071450
jl_printf(str, ":");
14081451
jl_static_show(str, line);

src/support/utf8.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ int u8_escape_wchar(char *buf, size_t sz, uint32_t ch)
410410
}
411411

412412
size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
413-
int escape_quotes, int ascii)
413+
const char *escapes, int ascii)
414414
{
415415
size_t i = *pi, i0;
416416
uint32_t ch;
@@ -420,12 +420,9 @@ size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t end,
420420

421421
while (i<end && buf<blim) {
422422
// sz-11: leaves room for longest escape sequence
423-
if (escape_quotes && src[i] == '"') {
424-
buf += buf_put2c(buf, "\\\"");
425-
i++;
426-
}
427-
else if (src[i] == '\\') {
428-
buf += buf_put2c(buf, "\\\\");
423+
if ((src[i] == '\\') || (escapes && strchr(escapes, src[i]))) {
424+
*buf++ = '\\';
425+
*buf++ = src[i];
429426
i++;
430427
}
431428
else {
@@ -571,8 +568,8 @@ int u8_isvalid(const char *str, size_t len)
571568
return 0;
572569
// Check for surrogate chars
573570
if (byt == 0xed && *pnt > 0x9f) return 0;
574-
// Check for overlong encoding
575-
if (byt == 0xe0 && *pnt < 0xa0) return 0;
571+
// Check for overlong encoding
572+
if (byt == 0xe0 && *pnt < 0xa0) return 0;
576573
pnt += 2;
577574
} else { // 4-byte sequence
578575
// Must have 3 valid continuation characters

0 commit comments

Comments
 (0)