Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 10 additions & 33 deletions warehouse/forklift/legacy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# SPDX-License-Identifier: Apache-2.0
import csv
import hashlib
import hmac
import os.path
Expand Down Expand Up @@ -65,6 +64,11 @@
from warehouse.rate_limiting.interfaces import RateLimiterException
from warehouse.utils import readme, zipfiles
from warehouse.utils.release import strip_keywords
from warehouse.utils.wheel import (
InvalidWheelRecordError,
MissingWheelRecordError,
validate_record,
)

PATH_HASHER = "blake2_256"

Expand Down Expand Up @@ -455,11 +459,6 @@ def _sort_releases(request: Request, project: Project):
r._pypi_ordering = i


def _zip_filename_is_dir(filename: str) -> bool:
"""Return True if this ZIP archive member is a directory."""
return filename.endswith(("/", "\\"))


@view_config(
route_name="forklift.legacy.file_upload",
uses_session=True,
Expand Down Expand Up @@ -1417,30 +1416,9 @@ def file_upload(request):
f"distribution file {filename} at {license_filename}",
)

"""
Extract RECORD file from a wheel and check the ZIP archive contents
against the files listed in the RECORD. Mismatches are reported via email.
"""
record_filename = f"{name}-{version}.dist-info/RECORD"
# Files that must be missing from 'RECORD',
# so we ignore them when cross-checking.
record_exemptions = {
f"{name}-{version}.dist-info/RECORD.jws",
f"{name}-{version}.dist-info/RECORD.p7s",
}
try:
with zipfile.ZipFile(temporary_filename) as zfp:
wheel_record_contents = zfp.read(record_filename).decode()
record_entries = {
fn.replace("\\", "/") # Normalize Windows path separators.
for fn, *_ in csv.reader(wheel_record_contents.splitlines())
}
zip_entries = {
fn
for fn in zfp.namelist()
if not _zip_filename_is_dir(fn) and fn not in record_exemptions
}
except (UnicodeError, KeyError, csv.Error) as e:
validate_record(temporary_filename)
except MissingWheelRecordError:
request.metrics.increment(
"warehouse.upload.failed",
tags=[
Expand All @@ -1451,13 +1429,12 @@ def file_upload(request):
raise _exc_with_message(
HTTPBadRequest,
"Wheel '{filename}' does not contain the required "
"RECORD file: {record_filename} {e}".format(
"RECORD file: {record_filename}".format(
filename=filename,
record_filename=record_filename,
e=str(type(e)) + repr(e),
record_filename=f"{name}-{version}.dist-info/RECORD",
),
)
if record_entries != zip_entries:
except InvalidWheelRecordError:
send_wheel_record_mismatch_email(
request,
set(project.users),
Expand Down
76 changes: 75 additions & 1 deletion warehouse/utils/wheel.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
# SPDX-License-Identifier: Apache-2.0

import csv
import os
import re
import sys
import zipfile

import packaging.tags
import packaging.utils

# import sentry_sdk

class MissingWheelRecordError(Exception):
"""Internal exception used by this module"""


class InvalidWheelRecordError(Exception):
"""Internal exception used by this module"""


_PLATFORMS = [
(re.compile(r"^win_(.*?)$"), lambda m: f"Windows {_normalize_arch(m.group(1))}"),
Expand Down Expand Up @@ -153,3 +164,66 @@ def tags_to_filters(tags: set[packaging.tags.Tag]) -> dict[str, list[str]]:
"abis": sorted(abis),
"platforms": sorted(platforms),
}


def _zip_filename_is_dir(filename: str) -> bool:
"""Return True if this ZIP archive member is a directory."""
return filename.endswith(("/", "\\"))


def validate_record(wheel_filepath: str) -> bool:
"""
Extract RECORD file from a wheel and check the ZIP archive contents
against the files listed in the RECORD. Mismatches are reported via email.
"""
filename = os.path.basename(wheel_filepath)
name, version, _ = filename.split("-", 2)
record_filename = f"{name}-{version}.dist-info/RECORD"
# Files that must be missing from 'RECORD',
# so we ignore them when cross-checking.
record_exemptions = {
f"{name}-{version}.dist-info/RECORD.jws",
f"{name}-{version}.dist-info/RECORD.p7s",
}
try:
with zipfile.ZipFile(wheel_filepath) as zfp:
wheel_record_contents = zfp.read(record_filename).decode()
record_entries = {
fn.replace("\\", "/") # Normalize Windows path separators.
for fn, *_ in csv.reader(wheel_record_contents.splitlines())
}
wheel_entries = {
fn
for fn in zfp.namelist()
if not _zip_filename_is_dir(fn) and fn not in record_exemptions
}
except (UnicodeError, KeyError, csv.Error):
raise MissingWheelRecordError()
if record_entries != wheel_entries:
record_is_missing = wheel_entries - record_entries
wheel_is_missing = record_entries - wheel_entries
raise InvalidWheelRecordError(
(f"Record is missing {record_is_missing})" if record_is_missing else "")
+ ("; " if record_is_missing and wheel_is_missing else "")
+ (f"Wheel is missing {wheel_is_missing})" if wheel_is_missing else "")
)
return True


def main(argv) -> int: # pragma: no cover
if len(argv) != 1:
print("Usage: python -m warehouse.utils.wheel <wheel path>")
return 1
wheel_filepath = argv[0]
wheel_filename = os.path.basename(wheel_filepath)
try:
validate_record(wheel_filepath)
print(f"{wheel_filename}: OK")
return 0
except Exception as error:
print(f"{wheel_filename}: {error!r}")
return 1


if __name__ == "__main__": # pragma: no cover
sys.exit(main(sys.argv[1:]))