Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

name-parser: Remove filename parsing code and more #1259

Merged
merged 6 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bin/scripts/name_parser/FontnameParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
class FontnameParser:
"""Parse a font name and generate all kinds of names"""

def __init__(self, filename, logger):
"""Parse a font filename and store the results"""
def __init__(self, fontname, logger):
"""Parse a fontname and store the results"""
self.parse_ok = False
self.use_short_families = (False, False, False) # ( camelcase name, short styles, aggressive )
self.keep_regular_in_family = None # None = auto, True, False
Expand All @@ -17,7 +17,7 @@ def __init__(self, filename, logger):
self.ps_fontname_suff = ''
self.short_family_suff = ''
self.name_subst = []
[ self.parse_ok, self._basename, self.weight_token, self.style_token, self.other_token, self._rest ] = FontnameTools.parse_font_name(filename)
[ self.parse_ok, self._basename, self.weight_token, self.style_token, self.other_token, self._rest ] = FontnameTools.parse_font_name(fontname)
self.basename = self._basename
self.rest = self._rest
self.add_name_substitution_table(FontnameTools.SIL_TABLE)
Expand Down
56 changes: 23 additions & 33 deletions bin/scripts/name_parser/FontnameTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys

class FontnameTools:
"""Deconstruct a font filename to get standardized name parts"""
"""Deconstruct a fontname to get standardized name parts"""

@staticmethod
def front_upper(word):
Expand Down Expand Up @@ -64,24 +64,11 @@ def unify_style_names(style_name):
known_names = {
# Source of the table is the current sourcefonts
# Left side needs to be lower case
'-': '',
'book': '',
'text': '',
'ce': 'CE',
#'semibold': 'Demi',
'ob': 'Oblique',
'it': 'Italic',
'i': 'Italic',
'b': 'Bold',
'normal': 'Regular',
'c': 'Condensed',
'r': 'Regular',
'm': 'Medium',
'l': 'Light',
}
if style_name in known_names:
return known_names[style_name.lower()]
return style_name
return known_names.get(style_name.lower(), style_name)

@staticmethod
def find_in_dicts(key, dicts):
Expand Down Expand Up @@ -146,7 +133,7 @@ def make_oblique_style(weights, styles):
return (weights, styles)

@staticmethod
def get_name_token(name, tokens, allow_regex_token = False):
def get_name_token(name, tokens):
"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
# The default mode (allow_regex_token = False) will try to find any verbatim string in the
# tokens list (case insensitive matching) and give that tokens list item back with
Expand All @@ -160,7 +147,11 @@ def get_name_token(name, tokens, allow_regex_token = False):
not_matched = ""
all_tokens = []
j = 1
regex = re.compile('(.*?)(' + '|'.join(tokens) + ')(.*)', re.IGNORECASE)
token_regex = '|'.join(tokens)
# Allow a dash between CamelCase token word parts, i.e. Camel-Case
# This allows for styles like Extra-Bold
token_regex = re.sub(r'(?<=[a-z])(?=[A-Z])', '-?', token_regex)
regex = re.compile('(.*?)(' + token_regex + ')(.*)', re.IGNORECASE)
while j:
j = regex.match(name)
if not j:
Expand All @@ -169,6 +160,7 @@ def get_name_token(name, tokens, allow_regex_token = False):
sys.exit('Malformed regex in FontnameTools.get_name_token()')
not_matched += ' ' + j.groups()[0] # Blanc prevents unwanted concatenation of unmatched substrings
tok = j.groups()[1].lower()
tok = tok.replace('-', '') # Remove dashes between CamelCase token words
if tok in lower_tokens:
tok = tokens[lower_tokens.index(tok)]
tok = FontnameTools.unify_style_names(tok)
Expand Down Expand Up @@ -238,6 +230,7 @@ def postscript_char_filter(name):
'Medium': ('Md', 'Med'),
'Nord': ('Nd', 'Nord'),
'Book': ('Bk', 'Book'),
'Text': ('Txt', 'Text'),
'Poster': ('Po', 'Poster'),
'Demi': ('Dm', 'Demi'), # Demi is sometimes used as a weight, sometimes as a modifier
'Regular': ('Rg', 'Reg'),
Expand Down Expand Up @@ -306,8 +299,9 @@ def is_keep_regular(basename):

@staticmethod
def _parse_simple_font_name(name):
"""Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""
# No dash in name, maybe we have blanc separated filename?
"""Parse a fontname that does not follow the 'FontFamilyName-FontStyle' pattern"""
# This is the usual case, because the font-patcher usually uses the fullname and
# not the PS name
if ' ' in name:
return FontnameTools.parse_font_name(name.replace(' ', '-'))
# Do we have a number-name boundary?
Expand All @@ -322,8 +316,15 @@ def _parse_simple_font_name(name):

@staticmethod
def parse_font_name(name):
"""Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
name = re.sub(r'\bsemi-condensed\b', 'SemiCondensed', name, 1, re.IGNORECASE) # Just for "3270 Semi-Condensed" :-/
"""Expects a fontname following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
# This could parse filenames in the beginning but that was never used in production; code removed with this commit
for special in [
('ExtLt', 'ExtraLight'), # IBM-Plex
('Medm', 'Medium'), # IBM-Plex
('Semi-Condensed', 'SemiCondensed'), # 3270
('SmBld', 'SemiBold'), # IBM-Plex
]:
name = re.sub(r'\b' + special[0] + r'\b', special[1], name, 1, re.IGNORECASE)
name = re.sub('[_\s]+', ' ', name)
matches = re.match(r'([^-]+)(?:-(.*))?', name)
familyname = FontnameTools.camel_casify(matches.group(1))
Expand All @@ -345,27 +346,16 @@ def parse_font_name(name):
# Some font specialities:
other = [
'-', 'Book', 'For', 'Powerline',
'Text', # Plex
'IIx', # Profont IIx
'LGC', # Inconsolata LGC
r'\bCE\b', # ProggycleanTT CE
r'[12][cmp]n?', # MPlus
r'(?:uni-)?1[14]', # GohuFont uni
]

# Sometimes used abbreviations
weight_abbrevs = [ 'ob', 'c', 'm', 'l', ]
style_abbrevs = [ 'it', 'r', 'b', 'i', ]

( style, weight_token ) = FontnameTools.get_name_token(style, weights)
( style, style_token ) = FontnameTools.get_name_token(style, styles)
( style, other_token ) = FontnameTools.get_name_token(style, other, True)
if (len(style) < 4
and style.lower() != 'pro'): # Prevent 'r' of Pro to be detected as style_abbrev
( style, weight_token_abbrevs ) = FontnameTools.get_name_token(style, weight_abbrevs)
( style, style_token_abbrevs ) = FontnameTools.get_name_token(style, style_abbrevs)
weight_token += weight_token_abbrevs
style_token += style_token_abbrevs
( style, other_token ) = FontnameTools.get_name_token(style, other)
while 'Regular' in style_token and len(style_token) > 1:
# Correct situation where "Regular" and something else is given
style_token.remove('Regular')
Expand Down
2 changes: 1 addition & 1 deletion font-patcher
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from __future__ import absolute_import, print_function, unicode_literals

# Change the script version when you edit this script:
script_version = "4.3.2"
script_version = "4.3.3"

version = "3.0.1"
projectName = "Nerd Fonts"
Expand Down