Skip to content

Commit d42e582

Browse files
authored
bpo-40260: Update modulefinder to use io.open_code() and respect coding comments (GH-19488)
1 parent aade1cc commit d42e582

3 files changed

Lines changed: 75 additions & 21 deletions

File tree

Lib/modulefinder.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import importlib.machinery
66
import marshal
77
import os
8+
import io
89
import sys
910
import types
1011
import warnings
@@ -68,35 +69,32 @@ def _find_module(name, path=None):
6869
# Some special cases:
6970

7071
if spec.loader is importlib.machinery.BuiltinImporter:
71-
return None, None, ("", "", _C_BUILTIN)
72+
return None, None, ("", _C_BUILTIN)
7273

7374
if spec.loader is importlib.machinery.FrozenImporter:
74-
return None, None, ("", "", _PY_FROZEN)
75+
return None, None, ("", _PY_FROZEN)
7576

7677
file_path = spec.origin
7778

7879
if spec.loader.is_package(name):
79-
return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
80+
return None, os.path.dirname(file_path), ("", _PKG_DIRECTORY)
8081

8182
if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
8283
kind = _PY_SOURCE
83-
mode = "r"
8484

8585
elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
8686
kind = _C_EXTENSION
87-
mode = "rb"
8887

8988
elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
9089
kind = _PY_COMPILED
91-
mode = "rb"
9290

9391
else: # Should never happen.
94-
return None, None, ("", "", _SEARCH_ERROR)
92+
return None, None, ("", _SEARCH_ERROR)
9593

96-
file = open(file_path, mode)
94+
file = io.open_code(file_path)
9795
suffix = os.path.splitext(file_path)[-1]
9896

99-
return file, file_path, (suffix, mode, kind)
97+
return file, file_path, (suffix, kind)
10098

10199

102100
class Module:
@@ -160,15 +158,15 @@ def msgout(self, *args):
160158

161159
def run_script(self, pathname):
162160
self.msg(2, "run_script", pathname)
163-
with open(pathname) as fp:
164-
stuff = ("", "r", _PY_SOURCE)
161+
with io.open_code(pathname) as fp:
162+
stuff = ("", _PY_SOURCE)
165163
self.load_module('__main__', fp, pathname, stuff)
166164

167165
def load_file(self, pathname):
168166
dir, name = os.path.split(pathname)
169167
name, ext = os.path.splitext(name)
170-
with open(pathname) as fp:
171-
stuff = (ext, "r", _PY_SOURCE)
168+
with io.open_code(pathname) as fp:
169+
stuff = (ext, _PY_SOURCE)
172170
self.load_module(name, fp, pathname, stuff)
173171

174172
def import_hook(self, name, caller=None, fromlist=None, level=-1):
@@ -333,14 +331,14 @@ def import_module(self, partname, fqname, parent):
333331
return m
334332

335333
def load_module(self, fqname, fp, pathname, file_info):
336-
suffix, mode, type = file_info
334+
suffix, type = file_info
337335
self.msgin(2, "load_module", fqname, fp and "fp", pathname)
338336
if type == _PKG_DIRECTORY:
339337
m = self.load_package(fqname, pathname)
340338
self.msgout(2, "load_module ->", m)
341339
return m
342340
if type == _PY_SOURCE:
343-
co = compile(fp.read()+'\n', pathname, 'exec')
341+
co = compile(fp.read()+b'\n', pathname, 'exec')
344342
elif type == _PY_COMPILED:
345343
try:
346344
data = fp.read()
@@ -504,7 +502,7 @@ def find_module(self, name, path, parent=None):
504502

505503
if path is None:
506504
if name in sys.builtin_module_names:
507-
return (None, None, ("", "", _C_BUILTIN))
505+
return (None, None, ("", _C_BUILTIN))
508506

509507
path = self.path
510508

Lib/test/test_modulefinder.py

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
from c import something
4141
b/__init__.py
4242
from sys import *
43-
"""]
43+
""",
44+
]
4445

4546
maybe_test_new = [
4647
"a.module",
@@ -245,6 +246,48 @@ def foo(): pass
245246
b/c.py
246247
"""]
247248

249+
coding_default_utf8_test = [
250+
"a_utf8",
251+
["a_utf8", "b_utf8"],
252+
[], [],
253+
"""\
254+
a_utf8.py
255+
# use the default of utf8
256+
print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
257+
import b_utf8
258+
b_utf8.py
259+
# use the default of utf8
260+
print('Unicode test B code point 2090 \u2090 that is not valid in cp1252')
261+
"""]
262+
263+
coding_explicit_utf8_test = [
264+
"a_utf8",
265+
["a_utf8", "b_utf8"],
266+
[], [],
267+
"""\
268+
a_utf8.py
269+
# coding=utf8
270+
print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
271+
import b_utf8
272+
b_utf8.py
273+
# use the default of utf8
274+
print('Unicode test B code point 2090 \u2090 that is not valid in cp1252')
275+
"""]
276+
277+
coding_explicit_cp1252_test = [
278+
"a_cp1252",
279+
["a_cp1252", "b_utf8"],
280+
[], [],
281+
b"""\
282+
a_cp1252.py
283+
# coding=cp1252
284+
# 0xe2 is not allowed in utf8
285+
print('CP1252 test P\xe2t\xe9')
286+
import b_utf8
287+
b_utf8.py
288+
# use the default of utf8
289+
print('Unicode test A code point 2090 \u2090 that is not valid in cp1252')
290+
"""]
248291

249292
def open_file(path):
250293
dirname = os.path.dirname(path)
@@ -253,18 +296,22 @@ def open_file(path):
253296
except OSError as e:
254297
if e.errno != errno.EEXIST:
255298
raise
256-
return open(path, "w")
299+
return open(path, 'wb')
257300

258301

259302
def create_package(source):
260303
ofi = None
261304
try:
262305
for line in source.splitlines():
263-
if line.startswith(" ") or line.startswith("\t"):
264-
ofi.write(line.strip() + "\n")
306+
if type(line) != bytes:
307+
line = line.encode('utf-8')
308+
if line.startswith(b' ') or line.startswith(b'\t'):
309+
ofi.write(line.strip() + b'\n')
265310
else:
266311
if ofi:
267312
ofi.close()
313+
if type(line) == bytes:
314+
line = line.decode('utf-8')
268315
ofi = open_file(os.path.join(TEST_DIR, line.strip()))
269316
finally:
270317
if ofi:
@@ -337,7 +384,7 @@ def test_bytecode(self):
337384
source_path = base_path + importlib.machinery.SOURCE_SUFFIXES[0]
338385
bytecode_path = base_path + importlib.machinery.BYTECODE_SUFFIXES[0]
339386
with open_file(source_path) as file:
340-
file.write('testing_modulefinder = True\n')
387+
file.write('testing_modulefinder = True\n'.encode('utf-8'))
341388
py_compile.compile(source_path, cfile=bytecode_path)
342389
os.remove(source_path)
343390
self._do_test(bytecode_test)
@@ -365,6 +412,14 @@ def test_extended_opargs(self):
365412
""" % list(range(2**16))] # 2**16 constants
366413
self._do_test(extended_opargs_test)
367414

415+
def test_coding_default_utf8(self):
416+
self._do_test(coding_default_utf8_test)
417+
418+
def test_coding_explicit_utf8(self):
419+
self._do_test(coding_explicit_utf8_test)
420+
421+
def test_coding_explicit_cp1252(self):
422+
self._do_test(coding_explicit_cp1252_test)
368423

369424
if __name__ == "__main__":
370425
unittest.main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Ensure :mod:`modulefinder` uses :func:`io.open_code` and respects coding comments.

0 commit comments

Comments
 (0)