Skip to content

Move char/unified conversion into precompiled Util module #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions bench/exmoji_bench.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ defmodule ExmojiBench do
bench "find_by_short_name - many", do: Exmoji.find_by_short_name("MOON")
bench "find_by_short_name - none", do: Exmoji.find_by_short_name("zzzz")

bench "char_to_unified - single", do: Exmoji.char_to_unified("🚀")
bench "char_to_unified - double", do: Exmoji.char_to_unified("\x{2601}\x{FE0F}")
bench "char_to_unified - single", do: Exmoji.Util.char_to_unified("🚀")
bench "char_to_unified - double", do: Exmoji.Util.char_to_unified("\x{2601}\x{FE0F}")

bench "unified_to_char - single", do: Exmoji.unified_to_char("1F47E")
bench "unified_to_char - double", do: Exmoji.unified_to_char("2764-fe0f")
bench "unified_to_char - triple", do: Exmoji.unified_to_char("0030-FE0F-20E3")
bench "unified_to_char - single", do: Exmoji.Util.unified_to_char("1F47E")
bench "unified_to_char - double", do: Exmoji.Util.unified_to_char("2764-fe0f")
bench "unified_to_char - triple", do: Exmoji.Util.unified_to_char("0030-FE0F-20E3")

end
47 changes: 0 additions & 47 deletions lib/exmoji.ex
Original file line number Diff line number Diff line change
Expand Up @@ -153,51 +153,4 @@ defmodule Exmoji do
end
defp _from_unified(_), do: nil


@doc """
Convert a unified ID directly to its bitstring glyph representation.

## Example

iex> Exmoji.unified_to_char("1F47E")
"👾"

"""
def unified_to_char(uid) do
uid
|> String.split("-")
|> Enum.map( &(String.to_integer(&1, 16)) )
|> List.to_string
end


@doc """
Convert a native bitstring glyph to its unified codepoint ID.

This is a conversion operation, not a match, so it may produce unexpected
results with different types of values.

## Examples

iex> Exmoji.char_to_unified("👾")
"1F47E"

iex> Exmoji.char_to_unified("\x{23}\x{fe0f}\x{20e3}")
"0023-FE0F-20E3"

"""
def char_to_unified(char) do
char
|> String.codepoints
|> Enum.map(&padded_hex_string/1)
|> Enum.join("-")
|> String.upcase
end
# produce a string representation of the integer value of a codepoint, in hex
# this should be zero-padded to a minimum of 4 digits
defp padded_hex_string(<< cp_int_value :: utf8 >>) do
cp_int_value |> Integer.to_string(16) |> String.rjust(4,?0)
end


end
8 changes: 4 additions & 4 deletions lib/exmoji/emoji_char.ex
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ defmodule Exmoji.EmojiChar do
"""
def render(ec, options \\ [variant_encoding: true])
def render(ec, variant_encoding: false) do
Exmoji.unified_to_char(ec.unified)
Exmoji.Util.unified_to_char(ec.unified)
end
def render(ec, variant_encoding: true) do
case variant?(ec) do
true -> Exmoji.unified_to_char( variant(ec) )
false -> Exmoji.unified_to_char( ec.unified )
true -> Exmoji.Util.unified_to_char( variant(ec) )
false -> Exmoji.Util.unified_to_char( ec.unified )
end
end

Expand All @@ -66,7 +66,7 @@ defmodule Exmoji.EmojiChar do
"""
def chars(%EmojiChar{}=emojichar) do
codepoint_ids(emojichar)
|> Enum.map(&Exmoji.unified_to_char/1)
|> Enum.map(&Exmoji.Util.unified_to_char/1)
end

@doc """
Expand Down
2 changes: 1 addition & 1 deletion lib/exmoji/scanner.ex
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ defmodule Exmoji.Scanner do
"""
def scan(str) do
bscan(str)
|> Enum.map(&Exmoji.char_to_unified/1)
|> Enum.map(&Exmoji.Util.char_to_unified/1)
|> Enum.map(&Exmoji.from_unified/1)
end

Expand Down
92 changes: 92 additions & 0 deletions lib/exmoji/util.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
defmodule Exmoji.Util.Unified do
@moduledoc false

# actual conversion function, used by `Exmoji.Util` to generate precompiled
# methods, and also used as a fallback for unmatched values.
def _unified_to_char(uid) do
uid
|> String.split("-")
|> Enum.map( &(String.to_integer(&1, 16)) )
|> List.to_string
end

end

defmodule Exmoji.Util.Char do
@moduledoc false

# actual conversion function, used by `Exmoji.Util` to generate precompiled
# methods, and also used as a fallback for unmatched values.
def _char_to_unified(char) do
char
|> String.codepoints
|> Enum.map(&padded_hex_string/1)
|> Enum.join("-")
|> String.upcase
end

# produce a string representation of the integer value of a codepoint, in hex
# this should be zero-padded to a minimum of 4 digits
defp padded_hex_string(<< cp_int_value :: utf8 >>) do
cp_int_value |> Integer.to_string(16) |> String.rjust(4,?0)
end

end


defmodule Exmoji.Util do
@moduledoc """
Provides utility functions to convert between Unicode unified ID values and
rendered Emoji glyphs in bitstring format.

Pattern matched with precompiled values for all known Emoji character values
for maximum speed, with fallbacks to algorithmic conversion.
"""

alias Exmoji.EmojiChar
alias Exmoji.Util.Unified
alias Exmoji.Util.Char

@doc """
Convert a unified ID directly to its bitstring glyph representation.

Precompiled only for uppercase format of the hex ID.

## Example

iex> Exmoji.Util.unified_to_char("1F47E")
"👾"

"""
for ec <- Exmoji.all, cp <- EmojiChar.codepoint_ids(ec) do
def unified_to_char( unquote(cp) ) do
unquote( Unified._unified_to_char(cp) )
end
end

# if not found, fallback
def unified_to_char(uid), do: Unified._unified_to_char(uid)


@doc """
Convert a native bitstring glyph to its unified codepoint ID.

## Examples

iex> Exmoji.Util.char_to_unified("👾")
"1F47E"

iex> Exmoji.Util.char_to_unified("\x{23}\x{fe0f}\x{20e3}")
"0023-FE0F-20E3"

"""
for ec <- Exmoji.all, cp <- EmojiChar.codepoint_ids(ec) do
def char_to_unified( unquote(Unified._unified_to_char(cp)) ) do
unquote(cp)
end
end

# if not found, fallback
def char_to_unified(uid), do: Char._char_to_unified(uid)

end
23 changes: 12 additions & 11 deletions test/exmoji_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defmodule ExmojiTest do
use ExUnit.Case, async: true
doctest Exmoji
doctest Exmoji.Util

# Define a number of known Emoji library characteristics.
# We should expect to get this many from our data file.
Expand Down Expand Up @@ -143,42 +144,42 @@ defmodule ExmojiTest do
# #char_to_unified
#
test ".char_to_unified - converts normal emoji to unified codepoint" do
assert Exmoji.char_to_unified("👾") == "1F47E"
assert Exmoji.char_to_unified("🚀") == "1F680"
assert Exmoji.Util.char_to_unified("👾") == "1F47E"
assert Exmoji.Util.char_to_unified("🚀") == "1F680"
end

test ".char_to_unified - converts double-byte emoji to proper codepoint" do
assert Exmoji.char_to_unified("🇺🇸") == "1F1FA-1F1F8"
assert Exmoji.Util.char_to_unified("🇺🇸") == "1F1FA-1F1F8"
end

test ".char_to_unified - in doublebyte, adds padding to hex codes that are <4 chars" do
assert Exmoji.char_to_unified("#⃣") == "0023-20E3"
assert Exmoji.Util.char_to_unified("#⃣") == "0023-20E3"
end

test ".char_to_unified - converts variant encoded emoji to variant unified codepoint" do
assert Exmoji.char_to_unified("\x{2601}\x{FE0F}") == "2601-FE0F"
assert Exmoji.Util.char_to_unified("\x{2601}\x{FE0F}") == "2601-FE0F"
end


#
# #unified_to_char
#
test ".unified_to_char - converts normal unified codepoints to unicode strings" do
assert Exmoji.unified_to_char("1F47E") == "👾"
assert Exmoji.unified_to_char("1F680") == "🚀"
assert Exmoji.Util.unified_to_char("1F47E") == "👾"
assert Exmoji.Util.unified_to_char("1F680") == "🚀"
end

test ".unified_to_char - converts doublebyte unified codepoints to unicode strings" do
assert Exmoji.unified_to_char("1F1FA-1F1F8") == "🇺🇸"
assert Exmoji.unified_to_char("0023-20E3") == "#⃣"
assert Exmoji.Util.unified_to_char("1F1FA-1F1F8") == "🇺🇸"
assert Exmoji.Util.unified_to_char("0023-20E3") == "#⃣"
end

test ".unified_to_char - converts variant unified codepoints to unicode strings" do
assert Exmoji.unified_to_char("2764-fe0f") == "\x{2764}\x{FE0F}"
assert Exmoji.Util.unified_to_char("2764-fe0f") == "\x{2764}\x{FE0F}"
end

test ".unified_to_char - converts variant+doublebyte chars (triplets!) to unicode strings" do
assert Exmoji.unified_to_char("0030-FE0F-20E3") == "\x{0030}\x{FE0F}\x{20E3}"
assert Exmoji.Util.unified_to_char("0030-FE0F-20E3") == "\x{0030}\x{FE0F}\x{20E3}"
end

end