Skip to content

Commit 1e5c165

Browse files
committed
move char/unified conversion into precompiled Util module
madness? THIS IS SPARTA
1 parent 27f76f4 commit 1e5c165

File tree

6 files changed

+121
-64
lines changed

6 files changed

+121
-64
lines changed

bench/exmoji_bench.exs

+5-5
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ defmodule ExmojiBench do
1414
bench "find_by_short_name - many", do: Exmoji.find_by_short_name("MOON")
1515
bench "find_by_short_name - none", do: Exmoji.find_by_short_name("zzzz")
1616

17-
bench "char_to_unified - single", do: Exmoji.char_to_unified("🚀")
18-
bench "char_to_unified - double", do: Exmoji.char_to_unified("\x{2601}\x{FE0F}")
17+
bench "char_to_unified - single", do: Exmoji.Util.char_to_unified("🚀")
18+
bench "char_to_unified - double", do: Exmoji.Util.char_to_unified("\x{2601}\x{FE0F}")
1919

20-
bench "unified_to_char - single", do: Exmoji.unified_to_char("1F47E")
21-
bench "unified_to_char - double", do: Exmoji.unified_to_char("2764-fe0f")
22-
bench "unified_to_char - triple", do: Exmoji.unified_to_char("0030-FE0F-20E3")
20+
bench "unified_to_char - single", do: Exmoji.Util.unified_to_char("1F47E")
21+
bench "unified_to_char - double", do: Exmoji.Util.unified_to_char("2764-fe0f")
22+
bench "unified_to_char - triple", do: Exmoji.Util.unified_to_char("0030-FE0F-20E3")
2323

2424
end

lib/exmoji.ex

-43
Original file line numberDiff line numberDiff line change
@@ -159,47 +159,4 @@ defmodule Exmoji do
159159

160160
defp _from_unified(_), do: nil
161161

162-
163-
@doc """
164-
Convert a unified ID directly to its bitstring glyph representation.
165-
166-
## Example
167-
168-
iex> Exmoji.unified_to_char("1F47E")
169-
"👾"
170-
171-
"""
172-
def unified_to_char(uid) do
173-
uid
174-
|> String.split("-")
175-
|> Enum.map( &(String.to_integer(&1, 16)) )
176-
|> List.to_string
177-
end
178-
179-
180-
@doc """
181-
Convert a native bitstring glyph to its unified codepoint ID.
182-
183-
This is a conversion operation, not a match, so it may produce unexpected
184-
results with different types of values.
185-
186-
## Example
187-
188-
iex> Exmoji.char_to_unified("👾")
189-
"1F47E"
190-
191-
"""
192-
def char_to_unified(char) do
193-
char
194-
|> String.codepoints
195-
|> Enum.map(&padded_hex_string/1)
196-
|> Enum.join("-")
197-
|> String.upcase
198-
end
199-
defp padded_hex_string(codepoint) do
200-
<< cp_int_value :: utf8 >> = codepoint
201-
cp_int_value |> Integer.to_string(16) |> String.rjust(4,?0)
202-
end
203-
204-
205162
end

lib/exmoji/emoji_char.ex

+4-4
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,12 @@ defmodule Exmoji.EmojiChar do
4444
"""
4545
def render(ec, options \\ [variant_encoding: true])
4646
def render(ec, variant_encoding: false) do
47-
Exmoji.unified_to_char(ec.unified)
47+
Exmoji.Util.unified_to_char(ec.unified)
4848
end
4949
def render(ec, variant_encoding: true) do
5050
case variant?(ec) do
51-
true -> Exmoji.unified_to_char( variant(ec) )
52-
false -> Exmoji.unified_to_char( ec.unified )
51+
true -> Exmoji.Util.unified_to_char( variant(ec) )
52+
false -> Exmoji.Util.unified_to_char( ec.unified )
5353
end
5454
end
5555

@@ -66,7 +66,7 @@ defmodule Exmoji.EmojiChar do
6666
"""
6767
def chars(%EmojiChar{unified: uid, variations: variations}) do
6868
[uid] ++ variations
69-
|> Enum.map(&Exmoji.unified_to_char/1)
69+
|> Enum.map(&Exmoji.Util.unified_to_char/1)
7070
end
7171

7272
@doc """

lib/exmoji/scanner.ex

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ defmodule Exmoji.Scanner do
2020
"""
2121
def scan(str) do
2222
bscan(str)
23-
|> Enum.map(&Exmoji.char_to_unified/1)
23+
|> Enum.map(&Exmoji.Util.char_to_unified/1)
2424
|> Enum.map(&Exmoji.from_unified/1)
2525
end
2626

lib/exmoji/util.ex

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
defmodule Exmoji.Util.Unified do
2+
@moduledoc false
3+
4+
def _unified_to_char(uid) do
5+
uid
6+
|> String.split("-")
7+
|> Enum.map( &(String.to_integer(&1, 16)) )
8+
|> List.to_string
9+
end
10+
11+
end
12+
13+
defmodule Exmoji.Util.Char do
14+
@moduledoc false
15+
16+
def _char_to_unified(char) do
17+
char
18+
|> String.codepoints
19+
|> Enum.map(&padded_hex_string/1)
20+
|> Enum.join("-")
21+
|> String.upcase
22+
end
23+
24+
defp padded_hex_string(codepoint) do
25+
<< cp_int_value :: utf8 >> = codepoint
26+
cp_int_value |> Integer.to_string(16) |> String.rjust(4,?0)
27+
end
28+
29+
end
30+
31+
32+
defmodule Exmoji.Util do
33+
@moduledoc """
34+
Provides utility functions to convert between Unicode unified ID values and
35+
rendered Emoji glyphs in bitstring format.
36+
37+
Pattern matched with precompiled values for all known Emoji character values
38+
for maximum speed, with fallbacks to algorithmic conversion.
39+
"""
40+
41+
alias Exmoji.EmojiChar
42+
alias Exmoji.Util.Unified
43+
alias Exmoji.Util.Char
44+
45+
@doc """
46+
Convert a unified ID directly to its bitstring glyph representation.
47+
48+
Precompiled only for uppercase format of the hex ID.
49+
50+
## Example
51+
52+
iex> Exmoji.Util.unified_to_char("1F47E")
53+
"👾"
54+
55+
"""
56+
for ec <- Exmoji.all do
57+
uid = ec.unified
58+
def unified_to_char( unquote(uid) ) do
59+
unquote( Unified._unified_to_char(uid) )
60+
end
61+
end
62+
63+
for ec <- Exmoji.all_with_variants do
64+
variant_id = EmojiChar.variant(ec)
65+
def unified_to_char( unquote(variant_id) ) do
66+
unquote( Unified._unified_to_char(variant_id) )
67+
end
68+
end
69+
70+
def unified_to_char(uid), do: Unified._unified_to_char(uid)
71+
72+
73+
@doc """
74+
Convert a native bitstring glyph to its unified codepoint ID.
75+
76+
## Example
77+
78+
iex> Exmoji.Util.char_to_unified("👾")
79+
"1F47E"
80+
81+
"""
82+
# create pattern matches (variants must be first)
83+
for ec <- Exmoji.all_with_variants do
84+
variant = List.first(ec.variations)
85+
def char_to_unified( unquote(Unified._unified_to_char(variant)) ) do
86+
unquote(variant)
87+
end
88+
end
89+
90+
for ec <- Exmoji.all do
91+
def char_to_unified( unquote(Unified._unified_to_char(ec.unified)) ) do
92+
unquote(ec.unified)
93+
end
94+
end
95+
96+
# if not found, fallback
97+
def char_to_unified(uid), do: Char._char_to_unified(uid)
98+
99+
end

test/exmoji_test.exs

+12-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
defmodule ExmojiTest do
22
use ExUnit.Case, async: true
33
doctest Exmoji
4+
doctest Exmoji.Util
45

56
# Define a number of known Emoji library characteristics.
67
# We should expect to get this many from our data file.
@@ -138,42 +139,42 @@ defmodule ExmojiTest do
138139
# #char_to_unified
139140
#
140141
test ".char_to_unified - converts normal emoji to unified codepoint" do
141-
assert Exmoji.char_to_unified("👾") == "1F47E"
142-
assert Exmoji.char_to_unified("🚀") == "1F680"
142+
assert Exmoji.Util.char_to_unified("👾") == "1F47E"
143+
assert Exmoji.Util.char_to_unified("🚀") == "1F680"
143144
end
144145

145146
test ".char_to_unified - converts double-byte emoji to proper codepoint" do
146-
assert Exmoji.char_to_unified("🇺🇸") == "1F1FA-1F1F8"
147+
assert Exmoji.Util.char_to_unified("🇺🇸") == "1F1FA-1F1F8"
147148
end
148149

149150
test ".char_to_unified - in doublebyte, adds padding to hex codes that are <4 chars" do
150-
assert Exmoji.char_to_unified("#⃣") == "0023-20E3"
151+
assert Exmoji.Util.char_to_unified("#⃣") == "0023-20E3"
151152
end
152153

153154
test ".char_to_unified - converts variant encoded emoji to variant unified codepoint" do
154-
assert Exmoji.char_to_unified("\x{2601}\x{FE0F}") == "2601-FE0F"
155+
assert Exmoji.Util.char_to_unified("\x{2601}\x{FE0F}") == "2601-FE0F"
155156
end
156157

157158

158159
#
159160
# #unified_to_char
160161
#
161162
test ".unified_to_char - converts normal unified codepoints to unicode strings" do
162-
assert Exmoji.unified_to_char("1F47E") == "👾"
163-
assert Exmoji.unified_to_char("1F680") == "🚀"
163+
assert Exmoji.Util.unified_to_char("1F47E") == "👾"
164+
assert Exmoji.Util.unified_to_char("1F680") == "🚀"
164165
end
165166

166167
test ".unified_to_char - converts doublebyte unified codepoints to unicode strings" do
167-
assert Exmoji.unified_to_char("1F1FA-1F1F8") == "🇺🇸"
168-
assert Exmoji.unified_to_char("0023-20E3") == "#⃣"
168+
assert Exmoji.Util.unified_to_char("1F1FA-1F1F8") == "🇺🇸"
169+
assert Exmoji.Util.unified_to_char("0023-20E3") == "#⃣"
169170
end
170171

171172
test ".unified_to_char - converts variant unified codepoints to unicode strings" do
172-
assert Exmoji.unified_to_char("2764-fe0f") == "\x{2764}\x{FE0F}"
173+
assert Exmoji.Util.unified_to_char("2764-fe0f") == "\x{2764}\x{FE0F}"
173174
end
174175

175176
test ".unified_to_char - converts variant+doublebyte chars (triplets!) to unicode strings" do
176-
assert Exmoji.unified_to_char("0030-FE0F-20E3") == "\x{0030}\x{FE0F}\x{20E3}"
177+
assert Exmoji.Util.unified_to_char("0030-FE0F-20E3") == "\x{0030}\x{FE0F}\x{20E3}"
177178
end
178179

179180
end

0 commit comments

Comments
 (0)