Source code for whenever._parse

import sys
from datetime import (
    date as _date,
    datetime as _datetime,
    time as _time,
    timedelta as _timedelta,
    timezone as _timezone,
)
from typing import Literal, NoReturn, cast

from ._common import (
    DUMMY_LEAP_YEAR,
    UTC,
    Nanos,
    check_utc_bounds,
    mk_fixed_tzinfo,
)
from ._tz import SafeTzId, TimeZone, get_tz, resolve_ambiguity, validate_tzid


[docs] class InvalidOffsetError(ValueError): """A string has an invalid offset for the given zone"""
def _parse_err(s: str) -> NoReturn: raise ValueError(f"Invalid format: {s!r}") from None def _parse_nanos(s: str) -> Nanos: if len(s) > 9 or not s.isdigit() or not s.isascii(): raise ValueError("Invalid decimals") return int(s.ljust(9, "0")) def _split_nextchar( s: str, chars: str, start: int = 0, end: int = -1 ) -> tuple[str, str | None, str]: for c in chars: if (idx := s.find(c, start, end)) != -1: return (s[:idx], c, s[idx + 1 :]) return (s, None, "") _is_sep = " Tt".__contains__ def _offset_from_iso(s: str) -> int: minutes = 0 seconds = 0 if len(s) == 5 and s[2] == ":" and s[3] < "6": # most common: HH:MM hours = int(s[:2]) minutes = int(s[3:]) elif len(s) == 4 and s[2] < "6": # HHMM hours = int(s[:2]) minutes = int(s[2:]) elif len(s) == 2: # HH hours = int(s) elif ( len(s) == 8 and s[2] == ":" and s[5] == ":" and s[3] < "6" and s[6] < "6" ): # HH:MM:SS hours = int(s[:2]) minutes = int(s[3:5]) seconds = int(s[6:]) elif len(s) == 6 and s[2] < "6" and s[4] < "6": # HHMMSS hours = int(s[:2]) minutes = int(s[2:4]) seconds = int(s[4:]) else: raise ValueError("Invalid offset format") return hours * 3600 + minutes * 60 + seconds def datetime_from_iso(s: str) -> tuple[_datetime, Nanos]: if len(s) < 11 or "W" in s or not s.isascii(): _parse_err(s) # OPTIMIZE: the happy path can be faster try: if _is_sep(s[10]): # date in extended format rest, date = s[11:], _date.fromisoformat(s[:10]) elif _is_sep(s[8]): # date in basic format rest, date = s[9:], _date_from_iso_basic(s[:8]) else: _parse_err(s) time, nanos = time_from_iso(rest) except ValueError: _parse_err(s) return _datetime.combine(date, time), nanos def offset_dt_from_iso(s: str) -> tuple[_datetime, Nanos]: if len(s) < 11 or "W" in s[:11] or not s.isascii(): _parse_err(s) try: if _is_sep(s[10]): # date in extended format rest, date = s[11:], _date.fromisoformat(s[:10]) elif _is_sep(s[8]): # date in basic format rest, date = s[9:], _date_from_iso_basic(s[:8]) else: _parse_err(s) time, nanos, offset, _ = _time_offset_tz_from_iso(rest) if offset is None: raise ValueError("Missing offset") elif offset == "Z": tzinfo = UTC else: assert isinstance(offset, _timezone) tzinfo = offset return ( check_utc_bounds(_datetime.combine(date, time, tzinfo)), nanos, ) except ValueError: _parse_err(s) def zdt_from_iso(s: str) -> tuple[_datetime, Nanos, TimeZone]: if len(s) < 11 or "W" in s[:11] or not s.isascii(): _parse_err(s) try: if _is_sep(s[10]): # date in extended format rest, date = s[11:], _date.fromisoformat(s[:10]) elif _is_sep(s[8]): # date in basic format rest, date = s[9:], _date_from_iso_basic(s[:8]) else: _parse_err(s) time, nanos, offset, tzid = _time_offset_tz_from_iso(rest) except ValueError: _parse_err(s) if tzid is None: _parse_err(s) tz = get_tz(tzid) if offset is None: dt = resolve_ambiguity(_datetime.combine(date, time), tz, "compatible") elif offset == "Z": utc_dt = _datetime.combine(date, time, UTC) dt = utc_dt.astimezone( mk_fixed_tzinfo(tz.offset_for_instant(int(utc_dt.timestamp()))) ) else: assert isinstance(offset, _timezone) dt = _datetime.combine(date, time, offset) # Raise an exception if instant is out of range dt.astimezone(UTC) # Ensure the offset is correct for the given instant expected_offset = tz.offset_for_instant(int(dt.timestamp())) # NOTE: mypy doesn't know utcoffset() can never return None here if dt.utcoffset().total_seconds() != expected_offset: # type: ignore[union-attr] raise InvalidOffsetError() return (dt, nanos, tz) def time_from_iso(s_orig: str) -> tuple[_time, Nanos]: s, sep, nanos_raw = _split_nextchar(s_orig, ".,", 6, 9) try: return ( _time_from_iso_nofrac(s), _parse_nanos(nanos_raw) if sep else 0, ) except ValueError: _parse_err(s_orig) # Parse the time, UTC offset, and timezone ID def _time_offset_tz_from_iso( s: str, ) -> tuple[_time, Nanos, _timezone | Literal["Z"] | None, SafeTzId | None]: # ditch the bracketted timezone (if present) if s.endswith("]"): # NOTE: sorry for the unicode escape sequences. Literal brackets # break my LSP's indentation detection. \x5b is open bracket '[' s, tz_raw = s[:-1].rsplit("\x5b", 1) tz = validate_tzid(tz_raw) else: tz = None # determine the offset offset: Literal["Z"] | _timezone | None if s.endswith(("Z", "z")): s_time = s[:-1] offset = "Z" else: s_time, sign, s_offset = _split_nextchar(s, "+-") if sign is None: offset = None else: offset_secs = _offset_from_iso(s_offset) if sign == "-": offset_secs = -offset_secs offset = mk_fixed_tzinfo(offset_secs) time, nanos = time_from_iso(s_time) return (time, nanos, offset, tz) def yearmonth_from_iso(s: str) -> _date: if not s.isascii(): _parse_err(s) try: if len(s) == 7 and s[4] == "-": year, month = int(s[:4]), int(s[5:]) elif len(s) == 6: year, month = int(s[:4]), int(s[4:]) else: _parse_err(s) return _date(year, month, 1) except ValueError: _parse_err(s) def monthday_from_iso(s: str) -> _date: if not (s.startswith("--") and s.isascii()): _parse_err(s) try: if len(s) == 7 and s[4] == "-": month, day = int(s[2:4]), int(s[5:]) elif len(s) == 6: month, day = int(s[2:4]), int(s[4:]) else: _parse_err(s) return _date(DUMMY_LEAP_YEAR, month, day) except ValueError: _parse_err(s) # The ISO parsing functions were improved in Python 3.11, # so we use them if available. if sys.version_info >= (3, 11): _date_from_iso_basic = _date.fromisoformat def _time_from_iso_nofrac(s: str) -> _time: # Compensate for a bug in CPython where times like "12:34:56:78" are # accepted as valid times. This is only fixed in Python 3.14+ if s.count(":") > 2: raise ValueError() if all(map("0123456789:".__contains__, s)): return _time.fromisoformat(s) raise ValueError() def date_from_iso(s: str) -> _date: # prevent isoformat from parsing stuff we don't want it to if "W" in s or not s.isascii(): _parse_err(s) try: return _date.fromisoformat(s) except ValueError: _parse_err(s) else: # pragma: no cover def _date_from_iso_basic(s: str, /) -> _date: return _date.fromisoformat(s[:4] + "-" + s[4:6] + "-" + s[6:8]) def _time_from_iso_nofrac(s: str) -> _time: # Compensate for the fact that Python's isoformat # doesn't support basic ISO 8601 formats if len(s) == 4: s = s[:2] + ":" + s[2:] elif len(s) == 6: s = s[:2] + ":" + s[2:4] + ":" + s[4:] if all(map("0123456789:".__contains__, s)): return _time.fromisoformat(s) raise ValueError() def date_from_iso(s: str) -> _date: if not s.isascii(): _parse_err(s) try: if len(s) == 8: return _date_from_iso_basic(s) return _date.fromisoformat(s) except ValueError: _parse_err(s) _RFC2822_WEEKDAY_TO_ISO = { "mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7, } _RFC2822_MONTH_NAMES = { "jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6, "jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12, } WEEKDAY_TO_RFC2822 = [s.title() for s in _RFC2822_WEEKDAY_TO_ISO] MONTH_TO_RFC2822 = [s.title() for s in _RFC2822_MONTH_NAMES] MONTH_TO_RFC2822.insert(0, "") # 1-indexed _RFC2822_ZONES = { "EST": -5, "EDT": -4, "CST": -6, "CDT": -5, "MST": -7, "MDT": -6, "PST": -8, "PDT": -7, "UT": 0, "GMT": 0, } def parse_rfc2822(s: str) -> _datetime: # Technically, only tab, space and CRLF are allowed in RFC2822, # but we allow any ASCII whitespace if not s.isascii(): _parse_err(s) # Parse the weekday try: first, second, *parts = s.split() if first.isdigit(): iso_weekday = None parts = [first, second, *parts] else: # Case: Mon, 23 Jan if len(first) == 4 and first[3] == ",": weekday_raw = first[:3] parts = [second, *parts] # Case: Mon , 23 Jan elif len(first) == 3 and second == ",": weekday_raw = first # Case: Mon ,23 Jan elif len(first) == 3 and second.startswith(","): weekday_raw = first parts = [second[1:], *parts] # Case: Mon,23 Jan elif len(first) > 4 and first[3] == ",": weekday_raw = first[:3] parts = [first[4:], second, *parts] else: _parse_err(s) iso_weekday = _RFC2822_WEEKDAY_TO_ISO[weekday_raw.lower()] except (ValueError, KeyError): _parse_err(s) # Parse the date try: day_raw, month_raw, year_raw, *parts = parts if len(day_raw) > 2: _parse_err(s) day = int(day_raw) month = _RFC2822_MONTH_NAMES[month_raw.lower()] if len(year_raw) == 4: year = int(year_raw) elif len(year_raw) == 2: year = int(year_raw) if year < 50: year += 2000 else: year += 1900 elif len(year_raw) == 3: year = int(year_raw) + 1900 else: _parse_err(s) date = _date(year, month, day) except (ValueError, KeyError): _parse_err(s) if iso_weekday and iso_weekday != date.isoweekday(): _parse_err(s) # Parse the time try: # time components may be separated by whitespace *time_parts, offset_raw = parts time_raw = "".join(time_parts) if len(time_raw) == 5 and time_raw[2] == ":": time = _time(int(time_raw[:2]), int(time_raw[3:])) elif len(time_raw) == 8 and time_raw[2] == ":" and time_raw[5] == ":": time = _time( int(time_raw[:2]), int(time_raw[3:5]), int(time_raw[6:]) ) else: _parse_err(s) except ValueError: _parse_err(s) # Parse the offset try: if offset_raw.startswith(("+", "-")) and len(offset_raw) == 5: sign = 1 if offset_raw[0] == "+" else -1 offset = ( _timedelta( hours=int(offset_raw[1:3]), minutes=int(offset_raw[3:5]) ) * sign ) elif offset_raw.isalpha(): # According to the spec, unknown timezones should # just be treated at -0000 (UTC with unknown offset) offset = _timedelta( hours=_RFC2822_ZONES.get(offset_raw.upper(), 0) ) else: _parse_err(s) tzinfo = _timezone(offset) except ValueError: _parse_err(s) return check_utc_bounds(_datetime.combine(date, time, tzinfo=tzinfo)) _MAX_TDELTA_DIGITS = 35 # consistent with Rust extension def parse_timedelta_component( fullstr: str, exc: Exception ) -> tuple[str, int, Literal["H", "M", "S"]]: try: split_index, unit = next( (i, c) for i, c in enumerate(fullstr) if c in "HMS" ) except StopIteration: raise exc raw, rest = fullstr[:split_index], fullstr[split_index + 1 :] if unit == "S": digits, sep, nanos_raw = _split_nextchar(raw, ".,") if ( len(digits) > _MAX_TDELTA_DIGITS or not digits.isdigit() or len(nanos_raw) > 9 or (sep and not nanos_raw.isdigit()) ): raise exc value = int(digits) * 1_000_000_000 + int(nanos_raw.ljust(9, "0")) else: if len(raw) > _MAX_TDELTA_DIGITS or not raw.isdigit(): raise exc value = int(raw) return rest, value, cast(Literal["H", "M", "S"], unit)