Skip to content

bpo-34990: Change pyc headers to use 64-bit timestamps #19651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Lib/compileall.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not force:
try:
mtime = int(os.stat(fullname).st_mtime)
expect = struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
expect = struct.pack('<4slq', importlib.util.MAGIC_NUMBER,
0, mtime)
for cfile in opt_cfiles.values():
with open(cfile, 'rb') as chandle:
actual = chandle.read(12)
actual = chandle.read(16)
if expect != actual:
break
else:
Expand Down
1 change: 1 addition & 0 deletions Lib/importlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

# To simplify imports in test code
_pack_uint32 = _bootstrap_external._pack_uint32
_pack_uint64 = _bootstrap_external._pack_uint64
_unpack_uint32 = _bootstrap_external._unpack_uint32

# Fully bootstrapped at this point, import whatever you like, circular
Expand Down
31 changes: 21 additions & 10 deletions Lib/importlib/_bootstrap_external.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,20 @@ def _relax_case():
return False
return _relax_case

def _pack_uint64(x):
"""Convert a 64-bit integer to litte-endian."""
return (int(x) & 0xFFFFFFFFFFFFFFFF).to_bytes(8, 'little')

def _pack_uint32(x):
"""Convert a 32-bit integer to little-endian."""
return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little')


def _unpack_uint64(data):
"""Convert 8 bytes in little-endian to an integer."""
assert len(data) == 8
return int.from_bytes(data, 'little')

def _unpack_uint32(data):
"""Convert 4 bytes in little-endian to an integer."""
assert len(data) == 4
Expand Down Expand Up @@ -277,6 +285,7 @@ def _write_atomic(path, data, mode=0o666):
# Python 3.9a2 3423 (add IS_OP, CONTAINS_OP and JUMP_IF_NOT_EXC_MATCH bytecodes #39156)
# Python 3.9a2 3424 (simplify bytecodes for *value unpacking)
# Python 3.9a2 3425 (simplify bytecodes for **value unpacking)
# Python 3.9a5 3426 (use 64 bit integers for timestamp and size in pyc header)

#
# MAGIC must change whenever the bytecode emitted by the compiler may no
Expand All @@ -286,7 +295,7 @@ def _write_atomic(path, data, mode=0o666):
# Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array
# in PC/launcher.c must also be updated.

MAGIC_NUMBER = (3425).to_bytes(2, 'little') + b'\r\n'
MAGIC_NUMBER = (3426).to_bytes(2, 'little') + b'\r\n'
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c

_PYCACHE = '__pycache__'
Expand Down Expand Up @@ -501,7 +510,7 @@ def _classify_pyc(data, name, exc_details):
"""Perform basic validity checking of a pyc header and return the flags field,
which determines how the pyc should be further validated against the source.

*data* is the contents of the pyc file. (Only the first 16 bytes are
*data* is the contents of the pyc file. (Only the first 24 bytes are
required, though.)

*name* is the name of the module being imported. It is used for logging.
Expand All @@ -518,7 +527,7 @@ def _classify_pyc(data, name, exc_details):
message = f'bad magic number in {name!r}: {magic!r}'
_bootstrap._verbose_message('{}', message)
raise ImportError(message, **exc_details)
if len(data) < 16:
if len(data) < 24:
message = f'reached EOF while reading pyc header of {name!r}'
_bootstrap._verbose_message('{}', message)
raise EOFError(message)
Expand All @@ -534,7 +543,7 @@ def _validate_timestamp_pyc(data, source_mtime, source_size, name,
exc_details):
"""Validate a pyc against the source last-modified time.

*data* is the contents of the pyc file. (Only the first 16 bytes are
*data* is the contents of the pyc file. (Only the first 24 bytes are
required.)

*source_mtime* is the last modified timestamp of the source file.
Expand All @@ -549,12 +558,12 @@ def _validate_timestamp_pyc(data, source_mtime, source_size, name,
An ImportError is raised if the bytecode is stale.

"""
if _unpack_uint32(data[8:12]) != (source_mtime & 0xFFFFFFFF):
if _unpack_uint64(data[8:16]) != (source_mtime & 0xFFFFFFFFFFFFFFFF):
message = f'bytecode is stale for {name!r}'
_bootstrap._verbose_message('{}', message)
raise ImportError(message, **exc_details)
if (source_size is not None and
_unpack_uint32(data[12:16]) != (source_size & 0xFFFFFFFF)):
_unpack_uint64(data[16:24]) != (source_size & 0xFFFFFFFFFFFFFFFF)):
raise ImportError(f'bytecode is stale for {name!r}', **exc_details)


Expand Down Expand Up @@ -599,8 +608,8 @@ def _code_to_timestamp_pyc(code, mtime=0, source_size=0):
"Produce the data for a timestamp-based pyc."
data = bytearray(MAGIC_NUMBER)
data.extend(_pack_uint32(0))
data.extend(_pack_uint32(mtime))
data.extend(_pack_uint32(source_size))
data.extend(_pack_uint64(mtime))
data.extend(_pack_uint64(source_size))
data.extend(marshal.dumps(code))
return data

Expand All @@ -612,6 +621,8 @@ def _code_to_hash_pyc(code, source_hash, checked=True):
data.extend(_pack_uint32(flags))
assert len(source_hash) == 8
data.extend(source_hash)
# Padding for where source size goes in timestamped pyc header.
data.extend(_pack_uint64(0))
data.extend(marshal.dumps(code))
return data

Expand Down Expand Up @@ -888,7 +899,7 @@ def get_code(self, fullname):
}
try:
flags = _classify_pyc(data, fullname, exc_details)
bytes_data = memoryview(data)[16:]
bytes_data = memoryview(data)[24:]
hash_based = flags & 0b1 != 0
if hash_based:
check_source = flags & 0b10 != 0
Expand Down Expand Up @@ -1070,7 +1081,7 @@ def get_code(self, fullname):
}
_classify_pyc(data, fullname, exc_details)
return _compile_bytecode(
memoryview(data)[16:],
memoryview(data)[24:],
name=fullname,
bytecode_path=path,
)
Expand Down
2 changes: 1 addition & 1 deletion Lib/modulefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def load_module(self, fqname, fp, pathname, file_info):
except ImportError as exc:
self.msgout(2, "raise ImportError: " + str(exc), pathname)
raise
co = marshal.loads(memoryview(data)[16:])
co = marshal.loads(memoryview(data)[24:])
else:
co = None
m = self.add_module(fqname)
Expand Down
2 changes: 1 addition & 1 deletion Lib/pkgutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def read_code(stream):
if magic != importlib.util.MAGIC_NUMBER:
return None

stream.read(12) # Skip rest of the header
stream.read(20) # Skip rest of the header
return marshal.load(stream)


Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_compileall.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def add_bad_source_file(self):

def timestamp_metadata(self):
with open(self.bc_path, 'rb') as file:
data = file.read(12)
data = file.read(16)
mtime = int(os.stat(self.source_path).st_mtime)
compare = struct.pack('<4sll', importlib.util.MAGIC_NUMBER, 0, mtime)
compare = struct.pack('<4slq', importlib.util.MAGIC_NUMBER, 0, mtime)
return data, compare

def recreation_check(self, metadata):
Expand All @@ -76,7 +76,7 @@ def recreation_check(self, metadata):

def test_mtime(self):
# Test a change in mtime leads to a new .pyc.
self.recreation_check(struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
self.recreation_check(struct.pack('<4slq', importlib.util.MAGIC_NUMBER,
0, 1))

def test_magic_number(self):
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_import/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def test_module_without_source(self):
def test_foreign_code(self):
py_compile.compile(self.file_name)
with open(self.compiled_name, "rb") as f:
header = f.read(16)
header = f.read(24)
code = marshal.load(f)
constants = list(code.co_consts)
foreign_code = importlib.import_module.__code__
Expand Down
14 changes: 7 additions & 7 deletions Lib/test/test_importlib/source/test_file_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,14 +464,14 @@ def _test_partial_timestamp(self, test, *, del_source=False):
def _test_partial_size(self, test, *, del_source=False):
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:15],
lambda bc: bc[:17],
del_source=del_source)
test('_temp', mapping, bc_path)

def _test_no_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bc_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:16],
lambda bc: bc[:24],
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bc_path
with self.assertRaises(EOFError):
Expand All @@ -480,7 +480,7 @@ def _test_no_marshal(self, *, del_source=False):
def _test_non_code_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:16] + marshal.dumps(b'abcd'),
lambda bc: bc[:24] + marshal.dumps(b'abcd'),
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(ImportError) as cm:
Expand All @@ -491,7 +491,7 @@ def _test_non_code_marshal(self, *, del_source=False):
def _test_bad_marshal(self, *, del_source=False):
with util.create_modules('_temp') as mapping:
bytecode_path = self.manipulate_bytecode('_temp', mapping,
lambda bc: bc[:16] + b'<test>',
lambda bc: bc[:24] + b'<test>',
del_source=del_source)
file_path = mapping['_temp'] if not del_source else bytecode_path
with self.assertRaises(EOFError):
Expand Down Expand Up @@ -635,7 +635,7 @@ def test_bad_marshal(self):
def test_old_timestamp(self):
# When the timestamp is older than the source, bytecode should be
# regenerated.
zeros = b'\x00\x00\x00\x00'
zeros = b'\x00\x00\x00\x00\x00\x00\x00\x00'
with util.create_modules('_temp') as mapping:
py_compile.compile(mapping['_temp'])
bytecode_path = self.util.cache_from_source(mapping['_temp'])
Expand All @@ -644,10 +644,10 @@ def test_old_timestamp(self):
bytecode_file.write(zeros)
self.import_(mapping['_temp'], '_temp')
source_mtime = os.path.getmtime(mapping['_temp'])
source_timestamp = self.importlib._pack_uint32(source_mtime)
source_timestamp = self.importlib._pack_uint64(source_mtime)
with open(bytecode_path, 'rb') as bytecode_file:
bytecode_file.seek(8)
self.assertEqual(bytecode_file.read(4), source_timestamp)
self.assertEqual(bytecode_file.read(8), source_timestamp)

# [bytecode read-only]
@util.writes_bytecode_files
Expand Down
4 changes: 2 additions & 2 deletions Lib/test/test_importlib/test_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,8 @@ def verify_code(self, code_object, *, bytecode_written=False):
self.assertIn(self.cached, self.loader.written)
data = bytearray(self.util.MAGIC_NUMBER)
data.extend(self.init._pack_uint32(0))
data.extend(self.init._pack_uint32(self.loader.source_mtime))
data.extend(self.init._pack_uint32(self.loader.source_size))
data.extend(self.init._pack_uint64(self.loader.source_mtime))
data.extend(self.init._pack_uint64(self.loader.source_size))
data.extend(marshal.dumps(code_object))
self.assertEqual(self.loader.written[self.cached], bytes(data))

Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_importlib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def get_code_from_pyc(pyc_path):
No header validation is performed.
"""
with open(pyc_path, 'rb') as pyc_f:
pyc_f.seek(16)
pyc_f.seek(24)
return marshal.load(pyc_f)


Expand Down
8 changes: 8 additions & 0 deletions Lib/test/test_py_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ def test_source_date_epoch(self):

self.assertEqual(flags, expected_flags)

def test_mtime_year_2038(self):
# Test that we can compile a file whose creation time is larger than
# a 32-bit number.
with open(self.source_path, 'r') as f:
os.utime(f.name, (2**33, 2**33))
py_compile.compile(self.source_path, self.pyc_path)
self.assertTrue(os.path.exists(self.pyc_path))

@unittest.skipIf(sys.flags.optimize > 0, 'test does not work with -O')
def test_double_dot_no_clobber(self):
# http://bugs.python.org/issue22966
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_zipimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def make_pyc(co, mtime, size):
else:
mtime = int(-0x100000000 + int(mtime))
pyc = (importlib.util.MAGIC_NUMBER +
struct.pack("<iii", 0, int(mtime), size & 0xFFFFFFFF) + data)
struct.pack("<iqq", 0, int(mtime), size & 0xFFFFFFFFFFFFFFFF) + data)
return pyc

def module_path_to_dotted_name(path):
Expand Down
8 changes: 4 additions & 4 deletions Lib/zipimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#from importlib import _bootstrap_external
#from importlib import _bootstrap # for _verbose_message
import _frozen_importlib_external as _bootstrap_external
from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64
import _frozen_importlib as _bootstrap # for _verbose_message
import _imp # for check_hash_based_pycs
import _io # for open
Expand Down Expand Up @@ -619,13 +619,13 @@ def _unmarshal_code(self, pathname, fullpath, fullname, data):
if source_mtime:
# We don't use _bootstrap_external._validate_timestamp_pyc
# to allow for a more lenient timestamp check.
if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
_unpack_uint32(data[12:16]) != source_size):
if (not _eq_mtime(_unpack_uint64(data[8:16]), source_mtime) or
_unpack_uint64(data[16:24]) != source_size):
_bootstrap._verbose_message(
f'bytecode is stale for {fullname!r}')
return None

code = marshal.loads(data[16:])
code = marshal.loads(data[24:])
if not isinstance(code, _code_type):
raise TypeError(f'compiled module {pathname!r} is not a code object')
return code
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Timestamp based .pyc files now use a 64-bit integer for the modified time
and file size values. This fixes a future Y2K38 problem whereby Python files
created with an mtime after 2038 will not work with the compileall module.
Loading