Skip to content

Commit ce33a55

Browse files
committed
Reapply "add Floki.css_escape/1 (#594)"
This reverts commit 9ce1d1b.
1 parent 4032396 commit ce33a55

File tree

3 files changed

+171
-0
lines changed

3 files changed

+171
-0
lines changed

lib/floki.ex

+15
Original file line numberDiff line numberDiff line change
@@ -790,4 +790,19 @@ defmodule Floki do
790790
def filter_out(elements, selector) do
791791
FilterOut.filter_out(elements, selector)
792792
end
793+
794+
@doc """
795+
Escapes a string for use as a CSS identifier.
796+
797+
## Examples
798+
799+
iex> Floki.css_escape("hello world")
800+
"hello\\\\ world"
801+
802+
iex> Floki.css_escape("-123")
803+
"-\\\\31 23"
804+
805+
"""
806+
@spec css_escape(String.t()) :: String.t()
807+
def css_escape(value), do: Floki.CSSEscape.escape(value)
793808
end

lib/floki/css_escape.ex

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
defmodule Floki.CSSEscape do
2+
@moduledoc false
3+
4+
# This is a direct translation of
5+
# https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js
6+
# into Elixir.
7+
8+
@doc """
9+
Escapes a string for use as a CSS identifier.
10+
11+
## Examples
12+
13+
iex> Floki.CSSEscape.escape("hello world")
14+
"hello\\\\ world"
15+
16+
iex> Floki.CSSEscape.escape("-123")
17+
"-\\\\31 23"
18+
19+
"""
20+
@spec escape(String.t()) :: String.t()
21+
def escape(value) when is_binary(value) do
22+
value
23+
|> String.to_charlist()
24+
|> escape_chars()
25+
|> IO.iodata_to_binary()
26+
end
27+
28+
def escape(_), do: raise(ArgumentError, "CSS.escape requires a string argument")
29+
30+
defp escape_chars(chars) do
31+
case chars do
32+
# If the character is the first character and is a `-` (U+002D), and
33+
# there is no second character, […]
34+
[?- | []] -> ["\\-"]
35+
_ -> do_escape_chars(chars, 0, [])
36+
end
37+
end
38+
39+
defp do_escape_chars([], _, acc), do: Enum.reverse(acc)
40+
41+
defp do_escape_chars([char | rest], index, acc) do
42+
escaped =
43+
cond do
44+
# If the character is NULL (U+0000), then the REPLACEMENT CHARACTER
45+
# (U+FFFD).
46+
char == 0 ->
47+
<<0xFFFD::utf8>>
48+
49+
# If the character is in the range [\1-\1F] (U+0001 to U+001F) or is
50+
# U+007F,
51+
# if the character is the first character and is in the range [0-9]
52+
# (U+0030 to U+0039),
53+
# if the character is the second character and is in the range [0-9]
54+
# (U+0030 to U+0039) and the first character is a `-` (U+002D),
55+
char in 0x0001..0x001F or char == 0x007F or
56+
(index == 0 and char in ?0..?9) or
57+
(index == 1 and char in ?0..?9 and hd(acc) == "-") ->
58+
# https://drafts.csswg.org/cssom/#escape-a-character-as-code-point
59+
["\\", Integer.to_string(char, 16), " "]
60+
61+
# If the character is not handled by one of the above rules and is
62+
# greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or
63+
# is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to
64+
# U+005A), or [a-z] (U+0061 to U+007A), […]
65+
char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z ->
66+
# the character itself
67+
<<char::utf8>>
68+
69+
true ->
70+
# Otherwise, the escaped character.
71+
# https://drafts.csswg.org/cssom/#escape-a-character
72+
["\\", <<char::utf8>>]
73+
end
74+
75+
do_escape_chars(rest, index + 1, [escaped | acc])
76+
end
77+
end

test/floki/css_escape_test.exs

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
defmodule Floki.CSSEscapeTest do
2+
use ExUnit.Case, async: true
3+
4+
doctest Floki.CSSEscape
5+
6+
test "null character" do
7+
assert Floki.CSSEscape.escape(<<0>>) == <<0xFFFD::utf8>>
8+
assert Floki.CSSEscape.escape("a\u0000") == "a\ufffd"
9+
assert Floki.CSSEscape.escape("\u0000b") == "\ufffdb"
10+
assert Floki.CSSEscape.escape("a\u0000b") == "a\ufffdb"
11+
end
12+
13+
test "replacement character" do
14+
assert Floki.CSSEscape.escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>>
15+
assert Floki.CSSEscape.escape("a\ufffd") == "a\ufffd"
16+
assert Floki.CSSEscape.escape("\ufffdb") == "\ufffdb"
17+
assert Floki.CSSEscape.escape("a\ufffdb") == "a\ufffdb"
18+
end
19+
20+
test "invalid input" do
21+
assert_raise ArgumentError, fn -> Floki.CSSEscape.escape(nil) end
22+
end
23+
24+
test "control characters" do
25+
assert Floki.CSSEscape.escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F "
26+
end
27+
28+
test "leading digit" do
29+
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
30+
assert Floki.CSSEscape.escape("#{digit}a") == "\\#{expected} a"
31+
end
32+
end
33+
34+
test "non-leading digit" do
35+
for digit <- 0..9 do
36+
assert Floki.CSSEscape.escape("a#{digit}b") == "a#{digit}b"
37+
end
38+
end
39+
40+
test "leading hyphen and digit" do
41+
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do
42+
assert Floki.CSSEscape.escape("-#{digit}a") == "-\\#{expected} a"
43+
end
44+
end
45+
46+
test "hyphens" do
47+
assert Floki.CSSEscape.escape("-") == "\\-"
48+
assert Floki.CSSEscape.escape("-a") == "-a"
49+
assert Floki.CSSEscape.escape("--") == "--"
50+
assert Floki.CSSEscape.escape("--a") == "--a"
51+
end
52+
53+
test "non-ASCII and special characters" do
54+
assert Floki.CSSEscape.escape("🤷🏻‍♂️-_©") == "🤷🏻‍♂️-_©"
55+
56+
assert Floki.CSSEscape.escape(
57+
<<0x7F,
58+
"\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">>
59+
) ==
60+
"\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f"
61+
62+
assert Floki.CSSEscape.escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2"
63+
end
64+
65+
test "alphanumeric characters" do
66+
assert Floki.CSSEscape.escape("a0123456789b") == "a0123456789b"
67+
assert Floki.CSSEscape.escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz"
68+
assert Floki.CSSEscape.escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
69+
end
70+
71+
test "space and exclamation mark" do
72+
assert Floki.CSSEscape.escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy"
73+
end
74+
75+
test "unicode characters" do
76+
# astral symbol (U+1D306 TETRAGRAM FOR CENTRE)
77+
assert Floki.CSSEscape.escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>>
78+
end
79+
end

0 commit comments

Comments
 (0)