Checksums and file padding - trouble understanding EOT spec

Question

Checksums and file padding - trouble understanding EOT spec

I expect few of you will know much about EOT files (I don't think anyone does), but you might perhaps know enough about file formats in general to shine some light on this problem.

I'm able to read an EOT font in Python just fine, but after modifying some of the header fields and recomputing the checksums the resulting file is rejected by Internet Explorer.

There are two checksums, one for the RootString field and another for the whole font file. The spec can be found here: https://www.w3.org/Submission/EOT/#Version22

When I parse an EOT file and recompute its ChecksumAdjustment I get a number that doesn't match the one in the file, but if I save the file with this different checksum, it still works. However, if I actually make any modification to the font, such as changing the font's name, it will no longer work.

If the checksums aren't the problem, I thought it might have to do with the padding fields, which I'm struggling to comprehend the purpose of. Apparently, the padding is to ensure ULONG (4 byte) alignment. Some of the header fields are of a custom length, so presumably the padding is used when a custom length field breaks the alignment. But then what's the purpose of the first padding field? There are no custom length fields preceding it and the padding occurs at a multiple of 4 bytes anyway so why is it needed? Furthermore, the file I have (which I know works) has every 2-byte padding field even where it breaks alignment.

Here is how I'm parsing the file:

def __init__(self, file_path):
    super().__init__(base.FontType.EOT)

    self._file_name = os.path.basename(file_path)

    with open(file_path, 'rb') as f:
        self.eot_size = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.font_data_size = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.version = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.flags = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.font_panose = struct.unpack('<10s', f.read(struct.calcsize('<10s')))[0]
        self.charset = struct.unpack('<s', f.read(struct.calcsize('<s')))[0]
        self.italic = struct.unpack('<s', f.read(struct.calcsize('<s')))[0]
        self.weight = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.fs_type = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.magic_number = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.unicode_range_1 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.unicode_range_2 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.unicode_range_3 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.unicode_range_4 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.code_page_range_1 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.code_page_range_2 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.checksum_adjustment = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.reserved1 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.reserved2 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.reserved3 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.reserved4 = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self._eat_padding(f)
        self.family_name_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.family_name = f.read(self.family_name_size).decode('utf-16')
        self._eat_padding(f)
        self.style_name_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.style_name = f.read(self.style_name_size).decode('utf-16')
        self._eat_padding(f)
        self.version_name_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.version_name = f.read(self.version_name_size).decode('utf-16')
        self._eat_padding(f)
        self.full_name_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.full_name = f.read(self.full_name_size).decode('utf-16')
        self._eat_padding(f)
        self.root_string_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.root_string = f.read(self.root_string_size).decode('utf-16')
        self.root_string_checksum = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.eudc_code_page = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self._eat_padding(f)
        self.signature_size = struct.unpack('<H', f.read(struct.calcsize('<H')))[0]
        self.signature = f.read(self.signature_size)
        self.eudc_flags = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.eudc_font_size = struct.unpack('<L', f.read(struct.calcsize('<L')))[0]
        self.eudc_font_data = f.read(self.eudc_font_size)
        self.font_data = f.read(self.font_data_size)

    _assert_equal(self.family_name_size, len(self.family_name.encode('utf-16-le')))
    _assert_equal(self.style_name_size, len(self.style_name.encode('utf-16-le')))
    _assert_equal(self.version_name_size, len(self.version_name.encode('utf-16-le')))
    _assert_equal(self.full_name_size, len(self.full_name.encode('utf-16-le')))
    _assert_equal(self.root_string_size, len(self.root_string.encode('utf-16-le')))


def _eat_padding(self, f):
    L = struct.calcsize('<L')
    H = struct.calcsize('<H')

#    f.read(H)
#    return

    excess = f.tell() % L
    if excess != 0:
        assert excess % H == 0, 'Font not USHORT aligned'
        bytes = f.read(L - excess)
        assert bytes == b'\x00' * (L - excess), 'Found non-zero padding bytes'

Here is how I export the file and compute the checksums:

def dump(self, f):
    self._compute_root_string_checksum()

    buf = io.BytesIO()

    buf.write(struct.pack('<L', self.eot_size))
    buf.write(struct.pack('<L', self.font_data_size))
    buf.write(struct.pack('<L', self.version))
    buf.write(struct.pack('<L', self.flags))
    buf.write(struct.pack('<10s', self.font_panose))
    buf.write(struct.pack('<s', self.charset))
    buf.write(struct.pack('<s', self.italic))
    buf.write(struct.pack('<L', self.weight))
    buf.write(struct.pack('<H', self.fs_type))
    buf.write(struct.pack('<H', self.magic_number))
    buf.write(struct.pack('<L', self.unicode_range_1))
    buf.write(struct.pack('<L', self.unicode_range_2))
    buf.write(struct.pack('<L', self.unicode_range_3))
    buf.write(struct.pack('<L', self.unicode_range_4))
    buf.write(struct.pack('<L', self.code_page_range_1))
    buf.write(struct.pack('<L', self.code_page_range_2))
    checksum_adj_pos = buf.tell()
    buf.write(struct.pack('<L', 0))
    buf.write(struct.pack('<L', self.reserved1))
    buf.write(struct.pack('<L', self.reserved2))
    buf.write(struct.pack('<L', self.reserved3))
    buf.write(struct.pack('<L', self.reserved4))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.family_name_size))
    buf.write(self.family_name.encode('utf-16-le'))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.style_name_size))
    buf.write(self.style_name.encode('utf-16-le'))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.version_name_size))
    buf.write(self.version_name.encode('utf-16-le'))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.full_name_size))
    buf.write(self.full_name.encode('utf-16-le'))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.root_string_size))
    buf.write(self.root_string.encode('utf-16-le'))
    buf.write(struct.pack('<L', self.root_string_checksum))
    buf.write(struct.pack('<L', self.eudc_code_page))
    self._add_padding(buf)
    buf.write(struct.pack('<H', self.signature_size))
    buf.write(self.signature)
    buf.write(struct.pack('<L', self.eudc_flags))
    buf.write(struct.pack('<L', self.eudc_font_size))
    buf.write(self.eudc_font_data)
    buf.write(self.font_data)

    total = 0
    buf.seek(0)
    while True:
        bytes = buf.read(4)

        if len(bytes) == 0:
            break

        assert len(bytes) % 4 == 0, 'Font not padded correctly'

        ulong = struct.unpack('<L', bytes)[0]
        total = (total + ulong) & 0xffffffff

    self.checksum_adjustment = (0xB1B0AFBA - total) & 0xffffffff

    buf.seek(checksum_adj_pos)
    buf.write(struct.pack('<L', self.checksum_adjustment))

    buf.seek(0)
    while True:
        bytes = buf.read(1)

        if len(bytes) == 0:
            break

        f.write(bytes)


def _compute_root_string_checksum(self):
    root_string = self.root_string.encode('utf-16-le')
    buf = io.BytesIO(root_string)

    total = 0
    while True:
        bytes = buf.read(4)

        if len(bytes) == 0:
            break

        assert len(bytes) % 4 == 0, 'Root String not padded correctly'

        ulong = struct.unpack('<L', bytes)[0]
        total = (total + ulong) & 0xffffffff

    self.root_string_checksum = total ^ 0x50475342

Any help is much appreciated. Thanks

fonts

padding

checksum

file-format

eot

asked on Stack Overflow Jan 12, 2016 by

RJinman

1 Answer

look here https://github.com/westmon/formhub/blob/af43646848106d06fbd5a82c90b9f0f6ab43613a/public/assets/plugins/ionicons/builder/scripts/eotlitetool.py

eot.checkSumAdjustment  = otf - > head -> offset + 12

answered on Stack Overflow Apr 17, 2020 by

quantw • edited Apr 19, 2020 by

Nicolai Schmid

User contributions licensed under CC BY-SA 3.0