Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡️ Speed up method isoparser._parse_isodate_common by 15% in python_modules/dagster/dagster/_vendored/dateutil/parser/isoparser.py #63

Open
wants to merge 1 commit into
base: codeflash/optimize-remove_none_recursively-2024-06-26T09.20.53
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 80 additions & 91 deletions python_modules/dagster/dagster/_vendored/dateutil/parser/isoparser.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,38 @@
# -*- coding: utf-8 -*-
"""
This module offers a parser for ISO-8601 strings
"""This module offers a parser for ISO-8601 strings

It is intended to support all valid date, time and datetime formats per the
ISO-8601 specification.

..versionadded:: 2.7.0
"""
from datetime import datetime, timedelta, time, date
import calendar

# CHANGED IN VENDORED VERSION
from .. import tz

import calendar
import re
from datetime import date, datetime, time, timedelta
from functools import wraps

import re
import six

# CHANGED IN VENDORED VERSION
from .. import tz

__all__ = ["isoparse", "isoparser"]


def _takes_ascii(f):
@wraps(f)
def func(self, str_in, *args, **kwargs):
# If it's a stream, read the whole thing
str_in = getattr(str_in, 'read', lambda: str_in)()
str_in = getattr(str_in, "read", lambda: str_in)()

# If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
if isinstance(str_in, six.text_type):
# ASCII is the same in UTF-8
try:
str_in = str_in.encode('ascii')
str_in = str_in.encode("ascii")
except UnicodeEncodeError as e:
msg = 'ISO-8601 strings should contain only ASCII characters'
msg = "ISO-8601 strings should contain only ASCII characters"
six.raise_from(ValueError(msg), e)

return f(self, str_in, *args, **kwargs)
Expand All @@ -43,25 +42,22 @@ def func(self, str_in, *args, **kwargs):

class isoparser(object):
def __init__(self, sep=None):
"""
:param sep:
A single character that separates date and time portions. If
``None``, the parser will accept any single character.
For strict ISO-8601 adherence, pass ``'T'``.
""":param sep:
A single character that separates date and time portions. If
``None``, the parser will accept any single character.
For strict ISO-8601 adherence, pass ``'T'``.
"""
if sep is not None:
if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
raise ValueError('Separator must be a single, non-numeric ' +
'ASCII character')
if len(sep) != 1 or ord(sep) >= 128 or sep in "0123456789":
raise ValueError("Separator must be a single, non-numeric " + "ASCII character")

sep = sep.encode('ascii')
sep = sep.encode("ascii")

self._sep = sep

@_takes_ascii
def isoparse(self, dt_str):
"""
Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
"""Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.

An ISO-8601 datetime string consists of a date portion, followed
optionally by a time portion - the date and time portions are separated
Expand Down Expand Up @@ -136,10 +132,10 @@ def isoparse(self, dt_str):
components, pos = self._parse_isodate(dt_str)

if len(dt_str) > pos:
if self._sep is None or dt_str[pos:pos + 1] == self._sep:
components += self._parse_isotime(dt_str[pos + 1:])
if self._sep is None or dt_str[pos : pos + 1] == self._sep:
components += self._parse_isotime(dt_str[pos + 1 :])
else:
raise ValueError('String contains unknown ISO components')
raise ValueError("String contains unknown ISO components")

if len(components) > 3 and components[3] == 24:
components[3] = 0
Expand All @@ -149,8 +145,7 @@ def isoparse(self, dt_str):

@_takes_ascii
def parse_isodate(self, datestr):
"""
Parse the date portion of an ISO string.
"""Parse the date portion of an ISO string.

:param datestr:
The string portion of an ISO string, without a separator
Expand All @@ -160,14 +155,14 @@ def parse_isodate(self, datestr):
"""
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {!r}'.format(datestr.decode('ascii')))
raise ValueError(
"String contains unknown ISO " + "components: {!r}".format(datestr.decode("ascii"))
)
return date(*components)

@_takes_ascii
def parse_isotime(self, timestr):
"""
Parse the time portion of an ISO string.
"""Parse the time portion of an ISO string.

:param timestr:
The time portion of an ISO string, without a separator
Expand All @@ -182,8 +177,7 @@ def parse_isotime(self, timestr):

@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
"""
Parse a valid ISO time zone string.
"""Parse a valid ISO time zone string.

See :func:`isoparser.isoparse` for details on supported formats.

Expand All @@ -201,9 +195,9 @@ def parse_tzstr(self, tzstr, zero_as_utc=True):
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)

# Constants
_DATE_SEP = b'-'
_TIME_SEP = b':'
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
_DATE_SEP = b"-"
_TIME_SEP = b":"
_FRACTION_REGEX = re.compile(b"[\\.,]([0-9]+)")

def _parse_isodate(self, dt_str):
try:
Expand All @@ -213,92 +207,87 @@ def _parse_isodate(self, dt_str):

def _parse_isodate_common(self, dt_str):
len_str = len(dt_str)
components = [1, 1, 1]

if len_str < 4:
raise ValueError('ISO string too short')
raise ValueError("ISO string too short")

# Year
components[0] = int(dt_str[0:4])
y = int(dt_str[0:4])
pos = 4

if pos >= len_str:
return components, pos
return [y, 1, 1], pos

has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
if has_sep:
pos += 1
has_sep = dt_str[pos] == self._sep
pos += has_sep

# Month
if len_str - pos < 2:
raise ValueError('Invalid common month')
raise ValueError("Invalid common month")

components[1] = int(dt_str[pos:pos + 2])
# Month
m = int(dt_str[pos : pos + 2])
pos += 2

if pos >= len_str:
if has_sep:
return components, pos
else:
raise ValueError('Invalid ISO format')
return [y, m, 1], pos

if has_sep:
if dt_str[pos:pos + 1] != self._DATE_SEP:
raise ValueError('Invalid separator in ISO string')
if dt_str[pos] != self._sep:
raise ValueError("Invalid separator in ISO string")
pos += 1

# Day
if len_str - pos < 2:
raise ValueError('Invalid common day')
components[2] = int(dt_str[pos:pos + 2])
return components, pos + 2
raise ValueError("Invalid common day")

# Day
d = int(dt_str[pos : pos + 2])

return [y, m, d], pos + 2

def _parse_isodate_uncommon(self, dt_str):
if len(dt_str) < 4:
raise ValueError('ISO string too short')
raise ValueError("ISO string too short")

# All ISO formats start with the year
year = int(dt_str[0:4])

has_sep = dt_str[4:5] == self._DATE_SEP

pos = 4 + has_sep # Skip '-' if it's there
if dt_str[pos:pos + 1] == b'W':
pos = 4 + has_sep # Skip '-' if it's there
if dt_str[pos : pos + 1] == b"W":
# YYYY-?Www-?D?
pos += 1
weekno = int(dt_str[pos:pos + 2])
weekno = int(dt_str[pos : pos + 2])
pos += 2

dayno = 1
if len(dt_str) > pos:
if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
raise ValueError('Inconsistent use of dash separator')
if (dt_str[pos : pos + 1] == self._DATE_SEP) != has_sep:
raise ValueError("Inconsistent use of dash separator")

pos += has_sep

dayno = int(dt_str[pos:pos + 1])
dayno = int(dt_str[pos : pos + 1])
pos += 1

base_date = self._calculate_weekdate(year, weekno, dayno)
else:
# YYYYDDD or YYYY-DDD
if len(dt_str) - pos < 3:
raise ValueError('Invalid ordinal day')
raise ValueError("Invalid ordinal day")

ordinal_day = int(dt_str[pos:pos + 3])
ordinal_day = int(dt_str[pos : pos + 3])
pos += 3

if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
raise ValueError('Invalid ordinal day' +
' {} for year {}'.format(ordinal_day, year))
raise ValueError("Invalid ordinal day" + f" {ordinal_day} for year {year}")

base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)

components = [base_date.year, base_date.month, base_date.day]
return components, pos

def _calculate_weekdate(self, year, week, day):
"""
Calculate the day of corresponding to the ISO year-week-day calendar.
"""Calculate the day of corresponding to the ISO year-week-day calendar.

This function is effectively the inverse of
:func:`datetime.date.isocalendar`.
Expand All @@ -316,13 +305,13 @@ def _calculate_weekdate(self, year, week, day):
Returns a :class:`datetime.date`
"""
if not 0 < week < 54:
raise ValueError('Invalid week: {}'.format(week))
raise ValueError(f"Invalid week: {week}")

if not 0 < day < 8: # Range is 1-7
raise ValueError('Invalid weekday: {}'.format(day))
if not 0 < day < 8: # Range is 1-7
raise ValueError(f"Invalid weekday: {day}")

# Get week 1 for the specific year:
jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it
jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it
week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)

# Now add the specific number of weeks and days to get what we want
Expand All @@ -336,30 +325,30 @@ def _parse_isotime(self, timestr):
comp = -1

if len_str < 2:
raise ValueError('ISO time too short')
raise ValueError("ISO time too short")

has_sep = False

while pos < len_str and comp < 5:
comp += 1

if timestr[pos:pos + 1] in b'-+Zz':
if timestr[pos : pos + 1] in b"-+Zz":
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break

if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
if comp == 1 and timestr[pos : pos + 1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
if timestr[pos : pos + 1] != self._TIME_SEP:
raise ValueError("Inconsistent use of colon separator")
pos += 1

if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
components[comp] = int(timestr[pos : pos + 2])
pos += 2

if comp == 3:
Expand All @@ -369,47 +358,47 @@ def _parse_isotime(self, timestr):
continue

us_str = frac.group(1)[:6] # Truncate to microseconds
components[comp] = int(us_str) * 10**(6 - len(us_str))
components[comp] = int(us_str) * 10 ** (6 - len(us_str))
pos += len(frac.group())

if pos < len_str:
raise ValueError('Unused components in ISO string')
raise ValueError("Unused components in ISO string")

if components[0] == 24:
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
raise ValueError("Hour may only be 24 at 24:00:00.000")

return components

def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z' or tzstr == b'z':
if tzstr == b"Z" or tzstr == b"z":
return tz.UTC

if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
raise ValueError("Time zone offset must be 1, 3, 5 or 6 characters")

if tzstr[0:1] == b'-':
if tzstr[0:1] == b"-":
mult = -1
elif tzstr[0:1] == b'+':
elif tzstr[0:1] == b"+":
mult = 1
else:
raise ValueError('Time zone offset requires sign')
raise ValueError("Time zone offset requires sign")

hours = int(tzstr[1:3])
if len(tzstr) == 3:
minutes = 0
else:
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3) :])

if zero_as_utc and hours == 0 and minutes == 0:
return tz.UTC
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')
raise ValueError("Invalid minutes in time zone offset")

if hours > 23:
raise ValueError('Invalid hours in time zone offset')
raise ValueError("Invalid hours in time zone offset")

return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)

Expand Down