Skip to content

Commit

Permalink
Merge branch 'Precognize-standalone_period_parse_serialize'
Browse files Browse the repository at this point in the history
  • Loading branch information
darthbear committed Feb 12, 2023
2 parents 7ab4d34 + 80a5067 commit fe5b64b
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 133 deletions.
86 changes: 6 additions & 80 deletions pyhocon/config_parser.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
import codecs
import contextlib
import copy
import itertools
import logging
import os
import re
import socket
import sys
from datetime import timedelta

import pyparsing

from pyparsing import (Forward, Group, Keyword, Literal, Optional,
ParserElement, ParseSyntaxException, QuotedString,
Regex, SkipTo, StringEnd, Suppress, TokenConverter,
Word, ZeroOrMore, alphanums, alphas8bit, col, lineno,
replaceWith, Or, nums, White, WordEnd)
replaceWith)

from pyhocon.period_parser import get_period_expr

# Fix deepcopy issue with pyparsing
if sys.version_info >= (3, 8):
Expand Down Expand Up @@ -108,22 +107,8 @@ class STR_SUBSTITUTION(object):
pass


def period(period_value, period_unit):
try:
from dateutil.relativedelta import relativedelta as period_impl
except Exception:
from datetime import timedelta as period_impl

if period_unit == 'nanoseconds':
period_unit = 'microseconds'
period_value = int(period_value / 1000)

arguments = dict(zip((period_unit,), (period_value,)))

if period_unit == 'milliseconds':
return timedelta(**arguments)

return period_impl(**arguments)
U_KEY_SEP = unicode('.')
U_KEY_FMT = unicode('"{0}"')


U_KEY_SEP = unicode('.')
Expand Down Expand Up @@ -246,42 +231,6 @@ class ConfigParser(object):
'\\"': '"',
}

period_type_map = {
'nanoseconds': ['ns', 'nano', 'nanos', 'nanosecond', 'nanoseconds'],

'microseconds': ['us', 'micro', 'micros', 'microsecond', 'microseconds'],
'milliseconds': ['ms', 'milli', 'millis', 'millisecond', 'milliseconds'],
'seconds': ['s', 'second', 'seconds'],
'minutes': ['m', 'minute', 'minutes'],
'hours': ['h', 'hour', 'hours'],
'weeks': ['w', 'week', 'weeks'],
'days': ['d', 'day', 'days'],

}

optional_period_type_map = {
'months': ['mo', 'month', 'months'], # 'm' from hocon spec removed. conflicts with minutes syntax.
'years': ['y', 'year', 'years']
}

supported_period_map = None

@classmethod
def get_supported_period_type_map(cls):
if cls.supported_period_map is None:
cls.supported_period_map = {}
cls.supported_period_map.update(cls.period_type_map)

try:
from dateutil import relativedelta

if relativedelta is not None:
cls.supported_period_map.update(cls.optional_period_type_map)
except Exception:
pass

return cls.supported_period_map

@classmethod
def parse(cls, content, basedir=None, resolve=True, unresolved_value=DEFAULT_SUBSTITUTION):
"""parse a HOCON content
Expand Down Expand Up @@ -320,16 +269,6 @@ def convert_number(tokens):
except ValueError:
return float(n)

def convert_period(tokens):
period_value = int(tokens.value)
period_identifier = tokens.unit

period_unit = next((single_unit for single_unit, values
in cls.get_supported_period_type_map().items()
if period_identifier in values))

return period(period_value, period_unit)

# ${path} or ${?path} for optional substitution
SUBSTITUTION_PATTERN = r"\$\{(?P<optional>\?)?(?P<variable>[^}]+)\}(?P<ws>[ \t]*)"

Expand Down Expand Up @@ -457,19 +396,6 @@ def set_default_white_spaces():
number_expr = Regex(r'[+-]?(\d*\.\d+|\d+(\.\d+)?)([eE][+\-]?\d+)?(?=$|[ \t]*([\$\}\],#\n\r]|//))',
re.DOTALL).setParseAction(convert_number)

# Flatten the list of lists with unit strings.
period_types = list(itertools.chain(*cls.get_supported_period_type_map().values()))
# `Or()` tries to match the longest expression if more expressions
# are matching. We employ this to match e.g.: 'weeks' so that we
# don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w'
# are valid unit identifiers.
# Allow only spaces as a valid separator between value and unit.
# E.g. \t as a separator is invalid: '10<TAB>weeks'.
period_expr = (
Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(
alphanums).suppress()
).setParseAction(convert_period)

# multi line string using """
# Using fix described in http://pyparsing.wikispaces.com/share/view/3778969
multiline_string = Regex('""".*?"*"""', re.DOTALL | re.UNICODE).setParseAction(parse_multi_string)
Expand All @@ -485,7 +411,7 @@ def set_default_white_spaces():
substitution_expr = Regex(r'[ \t]*\$\{[^\}]+\}[ \t]*').setParseAction(create_substitution)
string_expr = multiline_string | quoted_string | unquoted_string

value_expr = period_expr | number_expr | true_expr | false_expr | null_expr | string_expr
value_expr = get_period_expr() | number_expr | true_expr | false_expr | null_expr | string_expr

include_content = (
quoted_string | ((Keyword('url') | Keyword('file') | Keyword('package')) - Literal(
Expand Down
61 changes: 9 additions & 52 deletions pyhocon/converter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import json
import re
import sys
from datetime import timedelta

from pyhocon import ConfigFactory
from pyhocon.config_tree import ConfigQuotedString
from pyhocon.config_tree import ConfigSubstitution
from pyhocon.config_tree import ConfigTree
from pyhocon.config_tree import ConfigValues
from pyhocon.config_tree import NoneValue

from pyhocon.period_serializer import timedelta_to_str, is_timedelta_like, timedelta_to_hocon

try:
basestring
Expand Down Expand Up @@ -61,8 +60,8 @@ def to_json(cls, config, compact=False, indent=2, level=0):
)
lines += ',\n'.join(bet_lines)
lines += '\n{indent}]'.format(indent=''.rjust(level * indent, ' '))
elif cls._is_timedelta_like(config):
lines += cls._timedelta_to_str(config)
elif is_timedelta_like(config):
lines += timedelta_to_str(config)
elif isinstance(config, basestring):
lines = json.dumps(config, ensure_ascii=False)
elif config is None or isinstance(config, NoneValue):
Expand Down Expand Up @@ -138,8 +137,8 @@ def to_hocon(cls, config, compact=False, indent=2, level=0):
lines = '"""{value}"""'.format(value=config.value) # multilines
else:
lines = '"{value}"'.format(value=cls._escape_string(config.value))
elif cls._is_timedelta_like(config):
lines += cls._timedelta_to_hocon(config)
elif is_timedelta_like(config):
lines += timedelta_to_hocon(config)
elif config is None or isinstance(config, NoneValue):
lines = 'null'
elif config is True:
Expand Down Expand Up @@ -181,8 +180,8 @@ def to_yaml(cls, config, compact=False, indent=2, level=0):
bet_lines.append('{indent}- {value}'.format(indent=''.rjust(level * indent, ' '),
value=cls.to_yaml(item, compact, indent, level + 1)))
lines += '\n'.join(bet_lines)
elif cls._is_timedelta_like(config):
lines += cls._timedelta_to_str(config)
elif is_timedelta_like(config):
lines += timedelta_to_str(config)
elif isinstance(config, basestring):
# if it contains a \n then it's multiline
lines = config.split('\n')
Expand Down Expand Up @@ -223,8 +222,8 @@ def escape_value(value):
for index, item in enumerate(config):
if item is not None:
lines.append(cls.to_properties(item, compact, indent, stripped_key_stack + [str(index)]))
elif cls._is_timedelta_like(config):
lines.append('.'.join(stripped_key_stack) + ' = ' + cls._timedelta_to_str(config))
elif is_timedelta_like(config):
lines.append('.'.join(stripped_key_stack) + ' = ' + timedelta_to_str(config))
elif isinstance(config, basestring):
lines.append('.'.join(stripped_key_stack) + ' = ' + escape_value(config))
elif config is True:
Expand Down Expand Up @@ -292,45 +291,3 @@ def _escape_match(cls, match):
def _escape_string(cls, string):
return re.sub(r'[\x00-\x1F"\\]', cls._escape_match, string)

@classmethod
def _is_timedelta_like(cls, config):
return isinstance(config, timedelta) or relativedelta is not None and isinstance(config, relativedelta)

@classmethod
def _timedelta_to_str(cls, config):
if isinstance(config, relativedelta):
time_delta = cls._relative_delta_to_timedelta(config)
else:
time_delta = config
return str(int(time_delta.total_seconds() * 1000))

@classmethod
def _timedelta_to_hocon(cls, config):
"""
:type config: timedelta
"""
if relativedelta is not None and isinstance(config, relativedelta):
if config.hours > 0:
return str(config.hours) + ' hours'
elif config.minutes > 0:
return str(config.minutes) + ' minutes'

if config.days > 0:
return str(config.days) + ' days'
elif config.seconds > 0:
return str(config.seconds) + ' seconds'
elif config.microseconds > 0:
return str(config.microseconds) + ' microseconds'
else:
return '0 seconds'

@classmethod
def _relative_delta_to_timedelta(cls, relative_delta):
"""
:type relative_delta: relativedelta
"""
return timedelta(days=relative_delta.days,
hours=relative_delta.hours,
minutes=relative_delta.minutes,
seconds=relative_delta.seconds,
microseconds=relative_delta.microseconds)
73 changes: 73 additions & 0 deletions pyhocon/period_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import itertools
from datetime import timedelta

from pyparsing import (Word, ZeroOrMore, alphanums, Or, nums, White, WordEnd)

period_type_map = {
'nanoseconds': ['ns', 'nano', 'nanos', 'nanosecond', 'nanoseconds'],

'microseconds': ['us', 'micro', 'micros', 'microsecond', 'microseconds'],
'milliseconds': ['ms', 'milli', 'millis', 'millisecond', 'milliseconds'],
'seconds': ['s', 'second', 'seconds'],
'minutes': ['m', 'minute', 'minutes'],
'hours': ['h', 'hour', 'hours'],
'weeks': ['w', 'week', 'weeks'],
'days': ['d', 'day', 'days'],

}

optional_period_type_map = {
'months': ['mo', 'month', 'months'], # 'm' from hocon spec removed. conflicts with minutes syntax.
'years': ['y', 'year', 'years']
}

try:
from dateutil.relativedelta import relativedelta as period_impl

if period_impl is not None:
period_type_map.update(optional_period_type_map)
except ImportError:
period_impl = timedelta


def convert_period(tokens):
period_value = int(tokens.value)
period_identifier = tokens.unit

period_unit = next((single_unit for single_unit, values
in period_type_map.items()
if period_identifier in values))

return period(period_value, period_unit)


def period(period_value, period_unit):
if period_unit == 'nanoseconds':
period_unit = 'microseconds'
period_value = int(period_value / 1000)

arguments = dict(zip((period_unit,), (period_value,)))

if period_unit == 'milliseconds':
return timedelta(**arguments)

return period_impl(**arguments)


def get_period_expr():
# Flatten the list of lists with unit strings.
period_types = list(itertools.chain(*period_type_map.values()))
# `Or()` tries to match the longest expression if more expressions
# are matching. We employ this to match e.g.: 'weeks' so that we
# don't end up with 'w' and 'eeks'. Note that 'weeks' but also 'w'
# are valid unit identifiers.
# Allow only spaces as a valid separator between value and unit.
# E.g. \t as a separator is invalid: '10<TAB>weeks'.
return (
Word(nums)('value') + ZeroOrMore(White(ws=' ')).suppress() + Or(period_types)('unit') + WordEnd(
alphanums).suppress()
).setParseAction(convert_period)


def parse_period(content):
return get_period_expr().parseString(content, parseAll=True)[0]
45 changes: 45 additions & 0 deletions pyhocon/period_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import timedelta

try:
from dateutil.relativedelta import relativedelta
except Exception:
relativedelta = None


def is_timedelta_like(config):
return isinstance(config, timedelta) or relativedelta is not None and isinstance(config, relativedelta)


def timedelta_to_hocon(config):
""":type config: timedelta|relativedelta"""
if relativedelta is not None and isinstance(config, relativedelta):
if config.hours > 0:
return str(config.hours) + ' hours'
elif config.minutes > 0:
return str(config.minutes) + ' minutes'

if config.days > 0:
return str(config.days) + ' days'
elif config.seconds > 0:
return str(config.seconds) + ' seconds'
elif config.microseconds > 0:
return str(config.microseconds) + ' microseconds'
else:
return '0 seconds'


def relative_delta_to_timedelta(relative_delta):
""":type relative_delta: relativedelta"""
return timedelta(days=relative_delta.days,
hours=relative_delta.hours,
minutes=relative_delta.minutes,
seconds=relative_delta.seconds,
microseconds=relative_delta.microseconds)


def timedelta_to_str(config):
if isinstance(config, relativedelta):
time_delta = relative_delta_to_timedelta(config)
else:
time_delta = config
return str(int(time_delta.total_seconds() * 1000))
2 changes: 1 addition & 1 deletion tests/test_config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2650,5 +2650,5 @@ def test_parse_string_with_duration_optional_units(data_set):
config = ConfigFactory.parse_string(data_set[0])

assert config['a'] == data_set[1]
except Exception:
except ImportError:
pass
Loading

0 comments on commit fe5b64b

Please sign in to comment.