Skip to content

Commit f4b493f

Browse files
lopuhinasadurski
authored andcommitted
faster pop_tz_offset_from_string, 2x faster dateparser.parse if date has no timezone info (scrapinghub#569)
* [WIP] faster pop_tz_offset_from_string when most strings don't have tz offset, this is massively faster, as we avoid a loop over all timezones (around 800 of them). But it's possible to improve this. * fix fast path check: use re.IGNORECASE as in timezone regexps word_is_tz is supposed to be case sensitive, so don't modify its behaviour
1 parent 77f84c7 commit f4b493f

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

dateparser/timezone_parser.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,17 @@ def __getinitargs__(self):
3333

3434

3535
def pop_tz_offset_from_string(date_string, as_offset=True):
36-
for name, info in _tz_offsets:
37-
timezone_re = info['regex']
38-
timezone_match = timezone_re.search(date_string)
39-
if timezone_match:
40-
start, stop = timezone_match.span()
41-
date_string = date_string[:start + 1] + date_string[stop:]
42-
return date_string, StaticTzInfo(name, info['offset']) if as_offset else name
43-
else:
44-
return date_string, None
36+
if _search_regex_ignorecase.search(date_string):
37+
for name, info in _tz_offsets:
38+
timezone_re = info['regex']
39+
timezone_match = timezone_re.search(date_string)
40+
if timezone_match:
41+
start, stop = timezone_match.span()
42+
date_string = date_string[:start + 1] + date_string[stop:]
43+
return (
44+
date_string,
45+
StaticTzInfo(name, info['offset']) if as_offset else name)
46+
return date_string, None
4547

4648

4749
def word_is_tz(word):
@@ -85,4 +87,6 @@ def get_local_tz_offset():
8587
_search_regex_parts = []
8688
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
8789
_search_regex = re.compile('|'.join(_search_regex_parts))
90+
_search_regex_ignorecase = re.compile(
91+
'|'.join(_search_regex_parts), re.IGNORECASE)
8892
local_tz_offset = get_local_tz_offset()

0 commit comments

Comments
 (0)