-
-
Notifications
You must be signed in to change notification settings - Fork 165
/
Copy path04-unicode.sh
executable file
·107 lines (86 loc) · 2.2 KB
/
04-unicode.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env bash
#
# Usage:
# demo/04-unicode.sh <function name>
#
# TODO: Test what happens if you read binary data into a $(command sub)
# - internal NUL
# - invalid utf-8 sequence
#
# It would be nice to move some of this into test/gold? It depends on the
# locale.
set -o nounset
set -o pipefail
set -o errexit
# https://www.gnu.org/software/bash/manual/bash.html#Shell-Parameter-Expansion
#
# See doc/unicode.txt.
unicode-char() {
python -c 'print u"[\u03bc]".encode("utf-8")'
}
# http://stackoverflow.com/questions/602912/how-do-you-echo-a-4-digit-unicode-character-in-bash
echo-char() {
#echo -e "\xE2\x98\xA0"
echo -e "\xE2\x98\xA0"
#echo -e "\x03\xbc"
# Woah bash has this! Interesting. Not documented in "help echo" though.
echo -e '\u2620'
# GNU echo does not have it.
/bin/echo -e '\u2620'
}
raw-char() {
# Use vim to put utf-8 in this source file:
# 1. i to enter Insert mode
# 2. Ctrl-V
# 3. u
# 4. 03bc -- 4 digits of hex0
echo [μ]
}
quoted-chars() {
echo '[μ]'
echo "[μ]"
echo $'[\u03bc]' # C-escaped string
# Not implementing this
# https://www.gnu.org/software/bash/manual/html_node/Locale-Translation.html
echo $"hello"
}
test-unicode() {
locale # displays state
echo
echo $LANG
unicode-char
local u=$(unicode-char)
echo $u
# This changes bash behavior!
#LANG=C
echo ${#u} # three chars
# OK bash respect utf-8 when doing string slicing. Does it have its own
# unicode support, or does it use libc?
echo ${u:0} ${u:1} ${u:2}
local u=$(raw-char)
echo ${u:0} ${u:1} ${u:2}
}
json() {
python -c 'print "\"\u03bc\""' | python -c '
import sys, json
print json.loads(sys.stdin.read())
'
# \0u000 code point seems to be representable
python -c 'print "\"[\u0000]\""' | python -c '
import sys, json
print repr(json.loads(sys.stdin.read()))
'
# Works in python3 too.
python -c 'print "\"[\u0000]\""' | python3 -c '
import sys, json
print(repr(json.loads(sys.stdin.read())))
'
}
# Right now it's split into (Lit_Other '\xce') and (Lit_Other '\xbc'). This is
# fine for most purposes, although we could probably simplify this.
osh-literal() {
bin/osh -n -c 'echo [μ]'
# This works fine
bin/osh -c 'echo [μ]'
}
"$@"