From 1fe9e3b4ce243dd52b3df39ddffbe0304cc30cde Mon Sep 17 00:00:00 2001 From: Colin Rofls Date: Fri, 4 Oct 2024 14:57:37 -0400 Subject: [PATCH] Strip BOM from glif xml if present --- src/glyph/parse.rs | 5 +++++ src/glyph/tests.rs | 7 +++++++ testdata/bom_glif.glif | 25 +++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 testdata/bom_glif.glif diff --git a/src/glyph/parse.rs b/src/glyph/parse.rs index c328932b..ee801ab2 100644 --- a/src/glyph/parse.rs +++ b/src/glyph/parse.rs @@ -23,6 +23,9 @@ type Version = (u32, u32); const VERSION_1: Version = (1, 0); const VERSION_2: Version = (2, 0); +// https://en.wikipedia.org/wiki/Byte_order_mark +const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; + pub(crate) struct GlifParser<'names> { glyph: Glyph, version: Version, @@ -36,6 +39,8 @@ impl<'names> GlifParser<'names> { xml: &[u8], names: Option<&'names NameList>, ) -> Result { + // optional but allowed for utf-8. + let xml = xml.strip_prefix(UTF8_BOM).unwrap_or(xml); let mut reader = Reader::from_reader(xml); let mut buf = Vec::new(); reader.trim_text(true); diff --git a/src/glyph/tests.rs b/src/glyph/tests.rs index ca36e2e7..ef7cce42 100644 --- a/src/glyph/tests.rs +++ b/src/glyph/tests.rs @@ -888,3 +888,10 @@ fn deduplicate_unicodes2() { "#; assert_eq!(data2, data2_expected); } + +#[test] +fn bom_glif() { + let bytes = include_bytes!("../../testdata/bom_glif.glif"); + let glyph = parse_glyph(bytes).expect("initial load failed"); + assert_eq!(glyph.lib.get("hi").unwrap().as_string(), Some("hello")); +} diff --git a/testdata/bom_glif.glif b/testdata/bom_glif.glif new file mode 100644 index 00000000..73c05dc0 --- /dev/null +++ b/testdata/bom_glif.glif @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + hi + hello + + +