Skip to content

Commit 749e8a6

Browse files
committed
Add builtin RFC3339 validator
1 parent 6c7c951 commit 749e8a6

File tree

5 files changed

+226
-0
lines changed

5 files changed

+226
-0
lines changed

CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Unreleased
1111
.. vendor-insert-here
1212
1313
- Update vendored schemas (2024-02-05)
14+
- Include a built-in, efficient implementation of `date-time` format validation
15+
(RFC 3339). This makes the `date-time` format always available for
16+
validation. (:issue:`378`)
1417
- Support the use of `orjson` for faster JSON parsing when it is installed.
1518
This makes it an optional parser which is preferred over the default
1619
`json` module when it is available.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import jsonschema.validators
1010
import regress
1111

12+
from .implementations import validate_rfc3339
13+
1214
# all known format strings except for a selection from draft3 which have either
1315
# been renamed or removed:
1416
# - color
@@ -101,6 +103,7 @@ def make_format_checker(
101103
del checker.checkers["regex"]
102104
regex_impl = RegexImplementation(opts.regex_variant)
103105
checker.checks("regex")(regex_impl.check_format)
106+
checker.checks("date-time")(validate_rfc3339)
104107

105108
# remove the disabled checks, which may include the regex check
106109
for checkname in opts.disabled_formats:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .rfc3339 import validate as validate_rfc3339
2+
3+
__all__ = ("validate_rfc3339",)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
def validate(date_str: str) -> bool:
2+
"""Validate a string as a RFC3339 date-time.
3+
4+
This check does the fastest possible validation of the date string (in Python),
5+
deferring operations as much as possible to avoid unnecessary work.
6+
"""
7+
try:
8+
# the following chars MUST be fixed values:
9+
# YYYY-MM-DDTHH:MM:SSZ
10+
# ^ ^ ^ ^ ^
11+
#
12+
# so start by checking them first
13+
# this keeps us as fast as possible in failure cases
14+
#
15+
# (note: "T" and "t" are both allowed under ISO8601)
16+
if (
17+
date_str[4] != "-"
18+
or date_str[7] != "-"
19+
or date_str[10] not in ("T", "t")
20+
or date_str[13] != ":"
21+
or date_str[16] != ":"
22+
):
23+
return False
24+
25+
# check for fractional seconds, which pushes the location of the offset/Z
26+
# record the discovered start postiion of the offset segment
27+
offset_start = 19
28+
if date_str[19] in (".", ","):
29+
offset_start = date_str.find("Z", 20)
30+
if offset_start == -1:
31+
offset_start = date_str.find("z", 20)
32+
if offset_start == -1:
33+
offset_start = date_str.find("+", 20)
34+
if offset_start == -1:
35+
offset_start = date_str.find("-", 20)
36+
# if we can't find an offset after `.` or `,` as a separator, it's wrong
37+
if offset_start == -1:
38+
return False
39+
40+
# fractional seconds are checked to be numeric
41+
# the spec seems to allow for any number of digits (?) so there's no
42+
# length check here
43+
frac_seconds = date_str[20:offset_start]
44+
if not frac_seconds:
45+
return False
46+
if not frac_seconds.isnumeric():
47+
return False
48+
49+
# now, handle Z vs offset
50+
# (note: "Z" and "z" are both allowed under ISO8601)
51+
z_offset = date_str[offset_start:] in ("Z", "z")
52+
if z_offset and len(date_str) != offset_start + 1:
53+
return False
54+
if not z_offset:
55+
if len(date_str) != offset_start + 6:
56+
return False
57+
if date_str[offset_start] not in ("+", "-"):
58+
return False
59+
if date_str[offset_start + 3] != ":":
60+
return False
61+
62+
year = date_str[:4]
63+
if not year.isnumeric():
64+
return False
65+
year_val = int(year)
66+
67+
month = date_str[5:7]
68+
if not month.isnumeric():
69+
return False
70+
month_val = int(month)
71+
if not 1 <= month_val <= 12:
72+
return False
73+
74+
day = date_str[8:10]
75+
if not day.isnumeric():
76+
return False
77+
max_day = 31
78+
if month_val in (4, 6, 9, 11):
79+
max_day = 30
80+
elif month_val == 2:
81+
max_day = (
82+
29
83+
if year_val % 4 == 0 and (year_val % 100 != 0 or year_val % 400 == 0)
84+
else 28
85+
)
86+
if not 1 <= int(day) <= max_day:
87+
return False
88+
89+
hour = date_str[11:13]
90+
if not hour.isnumeric():
91+
return False
92+
if not 0 <= int(hour) <= 23:
93+
return False
94+
minute = date_str[14:16]
95+
if not minute.isnumeric():
96+
return False
97+
if not 0 <= int(minute) <= 59:
98+
return False
99+
second = date_str[17:19]
100+
if not second.isnumeric():
101+
return False
102+
if not 0 <= int(second) <= 59:
103+
return False
104+
105+
if not z_offset:
106+
offset_hour = date_str[offset_start + 1 : offset_start + 3]
107+
if not offset_hour.isnumeric():
108+
return False
109+
if not 0 <= int(offset_hour) <= 23:
110+
return False
111+
offset_minute = date_str[offset_start + 4 : offset_start + 6]
112+
if not offset_minute.isnumeric():
113+
return False
114+
if not 0 <= int(offset_minute) <= 59:
115+
return False
116+
except (IndexError, ValueError):
117+
return False
118+
return True

tests/unit/formats/test_rfc3339.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import random
2+
3+
import pytest
4+
5+
from check_jsonschema.formats.implementations.rfc3339 import validate
6+
7+
8+
@pytest.mark.parametrize(
9+
"datestr",
10+
(
11+
"2018-12-31T23:59:59Z",
12+
"2018-12-31t23:59:59Z",
13+
"2018-12-31t23:59:59z",
14+
"2018-12-31T23:59:59+00:00",
15+
"2018-12-31T23:59:59-00:00",
16+
),
17+
)
18+
def test_simple_positive_cases(datestr):
19+
assert validate(datestr)
20+
21+
22+
@pytest.mark.parametrize(
23+
"datestr",
24+
(
25+
"2018-12-31T23:59:59",
26+
"2018-12-31T23:59:59+00:00Z",
27+
"2018-12-31 23:59:59",
28+
),
29+
)
30+
def test_simple_negative_case(datestr):
31+
assert not validate(datestr)
32+
33+
34+
@pytest.mark.parametrize("precision", list(range(20)))
35+
@pytest.mark.parametrize(
36+
"offsetstr",
37+
(
38+
"Z",
39+
"+00:00",
40+
"-00:00",
41+
"+23:59",
42+
),
43+
)
44+
def test_allows_fracsec(precision, offsetstr):
45+
fracsec = random.randint(0, 10**precision)
46+
assert validate(f"2018-12-31T23:59:59.{fracsec}{offsetstr}")
47+
48+
49+
@pytest.mark.parametrize(
50+
"datestr",
51+
(
52+
# no such month
53+
"2020-13-01T00:00:00Z",
54+
"2020-00-01T00:00:00Z",
55+
# no such day
56+
"2020-01-00T00:00:00Z",
57+
"2020-01-32T00:00:00Z",
58+
),
59+
)
60+
def test_basic_bounds_validated(datestr):
61+
assert not validate(datestr)
62+
63+
64+
@pytest.mark.parametrize(
65+
"month, maxday",
66+
(
67+
(1, 31),
68+
(3, 31),
69+
(4, 30),
70+
(5, 31),
71+
(6, 30),
72+
(7, 31),
73+
(8, 31),
74+
(9, 30),
75+
(10, 31),
76+
(11, 30),
77+
),
78+
)
79+
def test_day_bounds_by_month(month, maxday):
80+
good_date = f"2020-{month:02}-{maxday:02}T00:00:00Z"
81+
bad_date = f"2020-{month:02}-{maxday+1:02}T00:00:00Z"
82+
assert validate(good_date)
83+
assert not validate(bad_date)
84+
85+
86+
@pytest.mark.parametrize(
87+
"year, maxday",
88+
(
89+
(2018, 28),
90+
(2016, 29),
91+
(2400, 29),
92+
(2500, 28),
93+
),
94+
)
95+
def test_day_bounds_for_february(year, maxday):
96+
good_date = f"{year}-02-{maxday:02}T00:00:00Z"
97+
bad_date = f"{year}-02-{maxday+1:02}T00:00:00Z"
98+
assert validate(good_date)
99+
assert not validate(bad_date)

0 commit comments

Comments
 (0)