From e5b2b138b41c3f5307b625f17eeea7b8248dcf23 Mon Sep 17 00:00:00 2001 From: Damien George Date: Tue, 2 Dec 2025 13:07:34 +1100 Subject: [PATCH 1/2] tarfile: Add basic unittest for tarfile.TarFile. Tests TarFile iteration and extracting file information. Signed-off-by: Damien George --- python-stdlib/tarfile/test.tar | Bin 0 -> 20480 bytes python-stdlib/tarfile/test_tarfile.py | 37 ++++++++++++++++++++++++++ tools/ci.sh | 1 + 3 files changed, 38 insertions(+) create mode 100644 python-stdlib/tarfile/test.tar create mode 100644 python-stdlib/tarfile/test_tarfile.py diff --git a/python-stdlib/tarfile/test.tar b/python-stdlib/tarfile/test.tar new file mode 100644 index 0000000000000000000000000000000000000000..7fa0604d7f389d2647224b46dc320b920ca2b400 GIT binary patch literal 20480 zcmeI%O=`n15QgDd_Y^)st6%#(PVAI!n&LL}`W-oK=%ykDOW5=^1TO^A@C=`1lwQg| zMxQnW(UK}GmIAyOQa9CxQ{3I#i_vhcw`EL#R&#SI8dLaMnK<;#w;bnR%U3$uYrw6*e{y6{5 z?fh@Uod3?W(!|n*rTPE)`JeiKEbN^6fA9X^`cC(MYl8Nl@4v#0(dm!u|K*W4yh6(V zP`JKD`JeZH&3_>Oh1)VuKal?#%$r?9FFzjjSG2x-{$0@VpYJ~(&3yg`=gEK3rhHF6 zR#+t8|1^5L|MMYU@Bdo9|0;AwWMu!Z;=I*GP8AAUmVf=9K=1!M$NpcTHF6^VHJDig z0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY** S5I_I{1Q0*~0R#|OEbs-AIgsK2 literal 0 HcmV?d00001 diff --git a/python-stdlib/tarfile/test_tarfile.py b/python-stdlib/tarfile/test_tarfile.py new file mode 100644 index 000000000..d3eb34318 --- /dev/null +++ b/python-stdlib/tarfile/test_tarfile.py @@ -0,0 +1,37 @@ +import tarfile +import unittest + + +test_tar_contents = ( + ("a", "file", 2), + ("b", "file", 2), + ("dir/", "dir", 0), + ("dir/c", "file", 2), + ("dir/d", "file", 2), + ("tar.tar", "file", 10240), +) + +test_sub_tar_contents = ( + ("e", "file", 2), + ("f", "file", 2), +) + + +class TestTarFile(unittest.TestCase): + def check_contents(self, expected, tf): + for i, file in enumerate(tf): + name, type, size = expected[i] + self.assertEqual(file.name, name) + self.assertEqual(file.type, type) + self.assertEqual(file.size, size) + + def test_iter(self): + tf = tarfile.TarFile("test.tar") + for _ in range(6): + self.assertIsInstance(next(tf), tarfile.TarInfo) + with self.assertRaises(StopIteration): + next(tf) + + def test_contents(self): + tf = tarfile.TarFile("test.tar") + self.check_contents(test_tar_contents, tf) diff --git a/tools/ci.sh b/tools/ci.sh index 6689e8aa4..abe83b563 100755 --- a/tools/ci.sh +++ b/tools/ci.sh @@ -90,6 +90,7 @@ function ci_package_tests_run { python-stdlib/pathlib \ python-stdlib/quopri \ python-stdlib/shutil \ + python-stdlib/tarfile \ python-stdlib/tempfile \ python-stdlib/time \ python-stdlib/unittest/tests \ From 2b91485c3cc728ba6ec9483c1a5fcea24c64d056 Mon Sep 17 00:00:00 2001 From: Damien George Date: Tue, 2 Dec 2025 13:08:13 +1100 Subject: [PATCH 2/2] tarfile: Fix FileSection.skip to not rely on extended readinto args. Commit 2ca1527321d7e6b65bcab45d304be82d65f3c4f4 optimized `FileSection.skip()` for memory use. But that introduced a dependency on the MicroPython-extension to stream read methods for an additional argument specifying a maximum read size. This optimization meant that all file-like objects passed into TarFile must support the extended 2-argument `readinto` form. This is problematic for at least two use cases: 1. Nested tar files, because `FileSetion` itself doesn't support 2-argument `readinto`. 2. Using `mpremote mount` and reading a tar file from the remote mount, which also doesn't support 2-argument `readinto`. Instead of requiring all file-like objects to implement this extended form of `readinto`, this commit changes `FileSection.skip()` so that it doesn't use this form. A test is added for this case which fails without the fix here. Signed-off-by: Damien George --- python-stdlib/tarfile/manifest.py | 2 +- python-stdlib/tarfile/tarfile/__init__.py | 9 ++++++--- python-stdlib/tarfile/test_tarfile.py | 8 ++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/python-stdlib/tarfile/manifest.py b/python-stdlib/tarfile/manifest.py index 9940bb051..c379837c7 100644 --- a/python-stdlib/tarfile/manifest.py +++ b/python-stdlib/tarfile/manifest.py @@ -1,4 +1,4 @@ -metadata(description="Read-only implementation of Python's tarfile.", version="0.4.1") +metadata(description="Read-only implementation of Python's tarfile.", version="0.4.2") # Originally written by Paul Sokolovsky. diff --git a/python-stdlib/tarfile/tarfile/__init__.py b/python-stdlib/tarfile/tarfile/__init__.py index 4bb95af30..00b271137 100644 --- a/python-stdlib/tarfile/tarfile/__init__.py +++ b/python-stdlib/tarfile/tarfile/__init__.py @@ -55,9 +55,12 @@ def skip(self): if sz: buf = bytearray(16) while sz: - s = min(sz, 16) - self.f.readinto(buf, s) - sz -= s + if sz >= 16: + self.f.readinto(buf) + sz -= 16 + else: + self.f.read(sz) + sz = 0 class TarInfo: diff --git a/python-stdlib/tarfile/test_tarfile.py b/python-stdlib/tarfile/test_tarfile.py index d3eb34318..7acd61af7 100644 --- a/python-stdlib/tarfile/test_tarfile.py +++ b/python-stdlib/tarfile/test_tarfile.py @@ -35,3 +35,11 @@ def test_iter(self): def test_contents(self): tf = tarfile.TarFile("test.tar") self.check_contents(test_tar_contents, tf) + + def test_nested_tar(self): + tf = tarfile.TarFile("test.tar") + for file in tf: + if file.name == "tar.tar": + subf = tf.extractfile(file) + subtf = tarfile.TarFile(fileobj=subf) + self.check_contents(test_sub_tar_contents, subtf)