fonttools · yanone · May 15, 2024 · May 15, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,8 @@ A more detailed list of changes is available in the corresponding milestones for
 ##  Upcoming release: 0.13.0 (2024-Sep-??)
 ### Changes to existing checks
 #### On the Universal profile
+  - **[case_mapping]:** Dynamically exclude incomplete Greek glyphs (PR #4721)
+  - **[missing_small_caps_glyphs]:** Rewrote it from scratch, marked it as **experimental** (issue #4713)
   - **[name/family_and_style_max_length"]:** Use nameID 16 (Typographic family name) to determine name length if it exists. (PR #4811)
 
 ### Migration of checks

diff --git a/Lib/fontbakery/checks/glyphset.py b/Lib/fontbakery/checks/glyphset.py
@@ -1,10 +1,13 @@
+import unicodedata
+from vharfbuzz import Vharfbuzz
+
 from fontbakery.constants import (
     NameID,
     PlatformID,
     WindowsEncodingID,
     WindowsLanguageID,
 )
-from fontbakery.prelude import check, Message, FAIL, WARN, PASS
+from fontbakery.prelude import check, Message, FAIL, WARN, SKIP, PASS
 from fontbakery.utils import bullet_list, glyph_has_ink
 
 
@@ -20,20 +23,15 @@
 )
 def check_case_mapping(ttFont):
     """Ensure the font supports case swapping for all its glyphs."""
-    import unicodedata
-    from fontbakery.utils import markdown_table
+    from fontbakery.utils import markdown_table, characters_per_script
 
     # These are a selection of codepoints for which the corresponding case-swap
     # glyphs are missing way too often on the Google Fonts library,
     # so we'll ignore for now:
     EXCEPTIONS = [
         0x0192,  # ƒ - Latin Small Letter F with Hook
         0x00B5,  # µ - Micro Sign
-        0x03C0,  # π - Greek Small Letter Pi
         0x2126,  # Ω - Ohm Sign
-        0x03BC,  # μ - Greek Small Letter Mu
-        0x03A9,  # Ω - Greek Capital Letter Omega
-        0x0394,  # Δ - Greek Capital Letter Delta
         0x0251,  # ɑ - Latin Small Letter Alpha
         0x0261,  # ɡ - Latin Small Letter Script G
         0x00FF,  # ÿ - Latin Small Letter Y with Diaeresis
@@ -51,6 +49,13 @@ def check_case_mapping(ttFont):
         0x026B,  # ɫ - Latin Small Letter L with Middle Tilde
     ]
 
+    # Font has incomplete legacy Greek coverage, so ignore Greek dynamically
+    # (minimal Greek coverage is 2x24=48 characters, so we assume incomplete
+    # if coverage is less than half of 48)
+    greek = characters_per_script(ttFont, "Greek")
+    if 0 < len(greek) < 24:
+        EXCEPTIONS.extend(greek)
+
     missing_counterparts_table = []
     cmap = ttFont["cmap"].getBestCmap()
     for codepoint in cmap:
@@ -222,51 +227,98 @@ def check_mandatory_glyphs(ttFont):
     rationale="""
         Ensure small caps glyphs are available if
         a font declares smcp or c2sc OT features.
+
+        If you believe that a certain character should not
+        be reported as missing, please add it to the
+        `exceptions_smcp` or `exceptions_c2sc` lists.
     """,
     proposal="https://github.com/fonttools/fontbakery/issues/3154",
+    experimental="Since 2024/May/15",
 )
 def check_missing_small_caps_glyphs(ttFont):
     """Ensure small caps glyphs are available."""
+    from fontbakery.utils import has_feature, characters_per_script
+
+    has_smcp = has_feature(ttFont, "smcp")
+    has_c2sc = has_feature(ttFont, "c2sc")
+
+    if not has_smcp and not has_c2sc:
+        yield SKIP, "Neither smcp nor c2sc features are declared in the font."
+        return
+
+    vhb = Vharfbuzz(ttFont.reader.file.name)
+    cmap = ttFont.getBestCmap()
+
+    missing_smcp = []
+    missing_c2sc = []
+
+    exceptions_smcp = [
+        0x0192,  # florin
+        0x00B5,  # micro (common, not Greek)
+        0x2113,  # liter sign
+        0xA78C,  # saltillo
+        0x1FBE,  # Greek prosgegrammeni
+    ]
+    exceptions_c2sc = [
+        0xA78B,  # Saltillo
+        0x2126,  # Ohm (not Omega)
+    ]
+
+    # Font has incomplete legacy Greek coverage, so ignore Greek dynamically
+    # (minimal Greek coverage is 2x24=48 characters, so we assume incomplete
+    # if coverage is less than half of 48)
+    if 0 < len(characters_per_script(ttFont, "Greek")) < 24:
+        exceptions_smcp.extend(characters_per_script(ttFont, "Greek", "Ll"))
+        exceptions_c2sc.extend(characters_per_script(ttFont, "Greek", "Lu"))
 
-    if "GSUB" in ttFont and ttFont["GSUB"].table.FeatureList is not None:
-        llist = ttFont["GSUB"].table.LookupList
-        for record in range(ttFont["GSUB"].table.FeatureList.FeatureCount):
-            feature = ttFont["GSUB"].table.FeatureList.FeatureRecord[record]
-            tag = feature.FeatureTag
-            if tag in ["smcp", "c2sc"]:
-                for index in feature.Feature.LookupListIndex:
-                    subtable = llist.Lookup[index].SubTable[0]
-                    if subtable.LookupType == 7:
-                        # This is an Extension lookup
-                        # used for reaching 32-bit offsets
-                        # within the GSUB table.
-                        subtable = subtable.ExtSubTable
-                    if not hasattr(subtable, "mapping"):
-                        continue
-                    smcp_glyphs = set()
-                    for value in subtable.mapping.values():
-                        if isinstance(value, list):
-                            for v in value:
-                                smcp_glyphs.add(v)
-                        else:
-                            smcp_glyphs.add(value)
-                    missing = smcp_glyphs - set(ttFont.getGlyphNames())
-                    if missing:
-                        missing = "\n\t - " + "\n\t - ".join(missing)
-                        yield FAIL, Message(
-                            "missing-glyphs",
-                            f"These '{tag}' glyphs are missing:\n\n{missing}",
-                        )
-                break
+    for codepoint in cmap:
+        char = chr(codepoint)
+
+        if (
+            has_smcp
+            and unicodedata.category(char) == "Ll"
+            and codepoint not in exceptions_smcp
+        ):
+            if vhb.serialize_buf(vhb.shape(char)) == vhb.serialize_buf(
+                vhb.shape(char, {"features": {"smcp": True}})
+            ):
+                missing_smcp.append(char)
+        if (
+            has_c2sc
+            and unicodedata.category(char) == "Lu"
+            and codepoint not in exceptions_c2sc
+        ):
+            if vhb.serialize_buf(vhb.shape(char)) == vhb.serialize_buf(
+                vhb.shape(char, {"features": {"c2sc": True}})
+            ):
+                missing_c2sc.append(char)
+
+    if missing_smcp:
+        missing_smcp = "\n\t - " + "\n\t - ".join(
+            [f"U+{ord(x):04X}: {unicodedata.name(x)}" for x in missing_smcp]
+        )
+        yield FAIL, Message(
+            "missing-smcp",
+            "'smcp' substitution target glyphs for these"
+            f" characters are missing:\n\n{missing_smcp}",
+        )
+
+    if missing_c2sc:
+        missing_c2sc = "\n\t - " + "\n\t - ".join(
+            [f"U+{ord(x):04X}: {unicodedata.name(x)}" for x in missing_c2sc]
+        )
+        yield FAIL, Message(
+            "missing-c2sc",
+            "'c2sc' substitution target glyphs for these"
+            f" characters are missing:\n\n{missing_c2sc}",
+        )
 
 
 def can_shape(ttFont, text, parameters=None):
     """
     Returns true if the font can render a text string without any
     .notdef characters.
     """
-    from vharfbuzz import Vharfbuzz
-
     filename = ttFont.reader.file.name
     vharfbuzz = Vharfbuzz(filename)
     buf = vharfbuzz.shape(text, parameters)

diff --git a/Lib/fontbakery/checks/tabular_glyphs.py b/Lib/fontbakery/checks/tabular_glyphs.py
@@ -145,6 +145,7 @@ def check_tabular_kerning(ttFont):
     from vharfbuzz import Vharfbuzz
     import uharfbuzz as hb
     import unicodedata
+    from fontbakery.utils import has_feature
 
     EXCLUDE = [
         "\u0600",  # Arabic
@@ -194,17 +195,6 @@ def unique_combinations(list_1, list_2):
 
         return unique_combinations
 
-    def has_feature(ttFont, featureTag):
-        if "GSUB" in ttFont and ttFont["GSUB"].table.FeatureList:
-            for FeatureRecord in ttFont["GSUB"].table.FeatureList.FeatureRecord:
-                if FeatureRecord.FeatureTag == featureTag:
-                    return True
-        if "GPOS" in ttFont and ttFont["GPOS"].table.FeatureList:
-            for FeatureRecord in ttFont["GPOS"].table.FeatureList.FeatureRecord:
-                if FeatureRecord.FeatureTag == featureTag:
-                    return True
-        return False
-
     def buf_to_width(buf):
         x_cursor = 0
 

diff --git a/Lib/fontbakery/utils.py b/Lib/fontbakery/utils.py
@@ -726,3 +726,30 @@ def image_dimensions(filename):
 
     else:
         return None  # some other file format
+
+
+def has_feature(ttFont, featureTag):
+    """Return whether a font has a certain OpenType feature"""
+    if "GSUB" in ttFont and ttFont["GSUB"].table.FeatureList:
+        for FeatureRecord in ttFont["GSUB"].table.FeatureList.FeatureRecord:
+            if FeatureRecord.FeatureTag == featureTag:
+                return True
+    if "GPOS" in ttFont and ttFont["GPOS"].table.FeatureList:
+        for FeatureRecord in ttFont["GPOS"].table.FeatureList.FeatureRecord:
+            if FeatureRecord.FeatureTag == featureTag:
+                return True
+    return False
+
+
+def characters_per_script(ttFont, target_script, target_category=None):
+    """Return the number of characters in a font for a given script"""
+    from unicodedataplus import script, category  # type: ignore
+
+    characters = []
+    for codepoint in ttFont.getBestCmap().keys():
+        if script(chr(codepoint)) == target_script and (
+            not target_category or category(chr(codepoint)) == target_category
+        ):
+            characters.append(codepoint)
+
+    return characters
diff --git a/data/test/cormorantunicase/CormorantUnicase-Bold.ttf b/data/test/cormorantunicase/CormorantUnicase-Bold.ttf
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ dependencies = [
     "ufolint",
     "ufo2ft >= 2.25.2",  # script lists for Unicode 14.0 were updated on v2.25.2
     "uharfbuzz",
+    "unicodedataplus",
     "vharfbuzz >= 0.2.0",
 ]
 

diff --git a/tests/test_checks_universal.py b/tests/test_checks_universal.py
@@ -1423,3 +1423,17 @@ def test_check_gsub_smallcaps_before_ligatures():
     smcp_feature.LookupListIndex = [1]
     liga_feature.LookupListIndex = [0]
     assert_results_contain(check(ttFont), FAIL, "feature-ordering")
+
+
+def test_check_missing_small_caps_glyphs():
+    """Check small caps glyphs are available."""
+    check = CheckTester("missing_small_caps_glyphs")
+
+    ttFont = TTFont(TEST_FILE("cormorantunicase/CormorantUnicase-Bold.ttf"))
+    assert_PASS(check(ttFont))
+
+    ttFont = TTFont(TEST_FILE("varfont/Georama[wdth,wght].ttf"))
+    assert_results_contain(check(ttFont), FAIL, "missing-smcp")
+
+    ttFont = TTFont(TEST_FILE("ubuntusans/UbuntuSans[wdth,wght].ttf"))
+    assert_results_contain(check(ttFont), FAIL, "missing-c2sc")