Skip to content

Commit dca4d3c

Browse files
authored
Significantly improve yaml load times when the C loader is available (home-assistant#73337)
1 parent b84e844 commit dca4d3c

File tree

8 files changed

+192
-80
lines changed

8 files changed

+192
-80
lines changed

.github/workflows/ci.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ env:
2828
PIP_CACHE: /tmp/pip-cache
2929
SQLALCHEMY_WARN_20: 1
3030
PYTHONASYNCIODEBUG: 1
31+
HASS_CI: 1
3132

3233
concurrency:
3334
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

.github/workflows/wheels.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ jobs:
158158
wheels-key: ${{ secrets.WHEELS_KEY }}
159159
wheels-user: wheels
160160
env-file: true
161-
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;cargo"
161+
apk: "build-base;cmake;git;linux-headers;libexecinfo-dev;bluez-dev;libffi-dev;openssl-dev;glib-dev;eudev-dev;libxml2-dev;libxslt-dev;libpng-dev;libjpeg-turbo-dev;tiff-dev;autoconf;automake;cups-dev;gmp-dev;mpfr-dev;mpc1-dev;ffmpeg-dev;gammu-dev;yaml-dev;cargo"
162162
pip: "Cython;numpy;scikit-build"
163163
skip-binary: aiohttp,grpcio
164164
constraints: "homeassistant/package_constraints.txt"

Dockerfile.dev

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ RUN \
1818
libavfilter-dev \
1919
libpcap-dev \
2020
libturbojpeg0 \
21+
libyaml-dev \
2122
libxml2 \
2223
git \
2324
cmake \

homeassistant/scripts/check_config.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def mock_secrets(ldr, node):
191191

192192
if secrets:
193193
# Ensure !secrets point to the patched function
194-
yaml_loader.SafeLineLoader.add_constructor("!secret", yaml_loader.secret_yaml)
194+
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)
195195

196196
def secrets_proxy(*args):
197197
secrets = Secrets(*args)
@@ -219,9 +219,7 @@ def secrets_proxy(*args):
219219
pat.stop()
220220
if secrets:
221221
# Ensure !secrets point to the original function
222-
yaml_loader.SafeLineLoader.add_constructor(
223-
"!secret", yaml_loader.secret_yaml
224-
)
222+
yaml_loader.add_constructor("!secret", yaml_loader.secret_yaml)
225223

226224
return res
227225

homeassistant/util/yaml/loader.py

+124-52
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,22 @@
44
from collections import OrderedDict
55
from collections.abc import Iterator
66
import fnmatch
7+
from io import StringIO
78
import logging
89
import os
910
from pathlib import Path
1011
from typing import Any, TextIO, TypeVar, Union, overload
1112

1213
import yaml
1314

15+
try:
16+
from yaml import CSafeLoader as FastestAvailableSafeLoader
17+
18+
HAS_C_LOADER = True
19+
except ImportError:
20+
HAS_C_LOADER = False
21+
from yaml import SafeLoader as FastestAvailableSafeLoader # type: ignore[misc]
22+
1423
from homeassistant.exceptions import HomeAssistantError
1524

1625
from .const import SECRET_YAML
@@ -88,6 +97,30 @@ def _load_secret_yaml(self, secret_dir: Path) -> dict[str, str]:
8897
return secrets
8998

9099

100+
class SafeLoader(FastestAvailableSafeLoader):
101+
"""The fastest available safe loader."""
102+
103+
def __init__(self, stream: Any, secrets: Secrets | None = None) -> None:
104+
"""Initialize a safe line loader."""
105+
self.stream = stream
106+
if isinstance(stream, str):
107+
self.name = "<unicode string>"
108+
elif isinstance(stream, bytes):
109+
self.name = "<byte string>"
110+
else:
111+
self.name = getattr(stream, "name", "<file>")
112+
super().__init__(stream)
113+
self.secrets = secrets
114+
115+
def get_name(self) -> str:
116+
"""Get the name of the loader."""
117+
return self.name
118+
119+
def get_stream_name(self) -> str:
120+
"""Get the name of the stream."""
121+
return self.stream.name or ""
122+
123+
91124
class SafeLineLoader(yaml.SafeLoader):
92125
"""Loader class that keeps track of line numbers."""
93126

@@ -103,6 +136,17 @@ def compose_node(self, parent: yaml.nodes.Node, index: int) -> yaml.nodes.Node:
103136
node.__line__ = last_line + 1 # type: ignore[attr-defined]
104137
return node
105138

139+
def get_name(self) -> str:
140+
"""Get the name of the loader."""
141+
return self.name
142+
143+
def get_stream_name(self) -> str:
144+
"""Get the name of the stream."""
145+
return self.stream.name or ""
146+
147+
148+
LoaderType = Union[SafeLineLoader, SafeLoader]
149+
106150

107151
def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
108152
"""Load a YAML file."""
@@ -114,60 +158,90 @@ def load_yaml(fname: str, secrets: Secrets | None = None) -> JSON_TYPE:
114158
raise HomeAssistantError(exc) from exc
115159

116160

117-
def parse_yaml(content: str | TextIO, secrets: Secrets | None = None) -> JSON_TYPE:
118-
"""Load a YAML file."""
161+
def parse_yaml(
162+
content: str | TextIO | StringIO, secrets: Secrets | None = None
163+
) -> JSON_TYPE:
164+
"""Parse YAML with the fastest available loader."""
165+
if not HAS_C_LOADER:
166+
return _parse_yaml_pure_python(content, secrets)
167+
try:
168+
return _parse_yaml(SafeLoader, content, secrets)
169+
except yaml.YAMLError:
170+
# Loading failed, so we now load with the slow line loader
171+
# since the C one will not give us line numbers
172+
if isinstance(content, (StringIO, TextIO)):
173+
# Rewind the stream so we can try again
174+
content.seek(0, 0)
175+
return _parse_yaml_pure_python(content, secrets)
176+
177+
178+
def _parse_yaml_pure_python(
179+
content: str | TextIO | StringIO, secrets: Secrets | None = None
180+
) -> JSON_TYPE:
181+
"""Parse YAML with the pure python loader (this is very slow)."""
119182
try:
120-
# If configuration file is empty YAML returns None
121-
# We convert that to an empty dict
122-
return (
123-
yaml.load(content, Loader=lambda stream: SafeLineLoader(stream, secrets))
124-
or OrderedDict()
125-
)
183+
return _parse_yaml(SafeLineLoader, content, secrets)
126184
except yaml.YAMLError as exc:
127185
_LOGGER.error(str(exc))
128186
raise HomeAssistantError(exc) from exc
129187

130188

189+
def _parse_yaml(
190+
loader: type[SafeLoader] | type[SafeLineLoader],
191+
content: str | TextIO,
192+
secrets: Secrets | None = None,
193+
) -> JSON_TYPE:
194+
"""Load a YAML file."""
195+
# If configuration file is empty YAML returns None
196+
# We convert that to an empty dict
197+
return (
198+
yaml.load(content, Loader=lambda stream: loader(stream, secrets))
199+
or OrderedDict()
200+
)
201+
202+
131203
@overload
132204
def _add_reference(
133-
obj: list | NodeListClass, loader: SafeLineLoader, node: yaml.nodes.Node
205+
obj: list | NodeListClass,
206+
loader: LoaderType,
207+
node: yaml.nodes.Node,
134208
) -> NodeListClass:
135209
...
136210

137211

138212
@overload
139213
def _add_reference(
140-
obj: str | NodeStrClass, loader: SafeLineLoader, node: yaml.nodes.Node
214+
obj: str | NodeStrClass,
215+
loader: LoaderType,
216+
node: yaml.nodes.Node,
141217
) -> NodeStrClass:
142218
...
143219

144220

145221
@overload
146-
def _add_reference(
147-
obj: _DictT, loader: SafeLineLoader, node: yaml.nodes.Node
148-
) -> _DictT:
222+
def _add_reference(obj: _DictT, loader: LoaderType, node: yaml.nodes.Node) -> _DictT:
149223
...
150224

151225

152-
def _add_reference(obj, loader: SafeLineLoader, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
226+
def _add_reference(obj, loader: LoaderType, node: yaml.nodes.Node): # type: ignore[no-untyped-def]
153227
"""Add file reference information to an object."""
154228
if isinstance(obj, list):
155229
obj = NodeListClass(obj)
156230
if isinstance(obj, str):
157231
obj = NodeStrClass(obj)
158-
setattr(obj, "__config_file__", loader.name)
232+
setattr(obj, "__config_file__", loader.get_name())
159233
setattr(obj, "__line__", node.start_mark.line)
160234
return obj
161235

162236

163-
def _include_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
237+
def _include_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
164238
"""Load another YAML file and embeds it using the !include tag.
165239
166240
Example:
167241
device_tracker: !include device_tracker.yaml
168242
169243
"""
170-
fname = os.path.join(os.path.dirname(loader.name), node.value)
244+
fname = os.path.join(os.path.dirname(loader.get_name()), node.value)
171245
try:
172246
return _add_reference(load_yaml(fname, loader.secrets), loader, node)
173247
except FileNotFoundError as exc:
@@ -191,12 +265,10 @@ def _find_files(directory: str, pattern: str) -> Iterator[str]:
191265
yield filename
192266

193267

194-
def _include_dir_named_yaml(
195-
loader: SafeLineLoader, node: yaml.nodes.Node
196-
) -> OrderedDict:
268+
def _include_dir_named_yaml(loader: LoaderType, node: yaml.nodes.Node) -> OrderedDict:
197269
"""Load multiple files from directory as a dictionary."""
198270
mapping: OrderedDict = OrderedDict()
199-
loc = os.path.join(os.path.dirname(loader.name), node.value)
271+
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
200272
for fname in _find_files(loc, "*.yaml"):
201273
filename = os.path.splitext(os.path.basename(fname))[0]
202274
if os.path.basename(fname) == SECRET_YAML:
@@ -206,11 +278,11 @@ def _include_dir_named_yaml(
206278

207279

208280
def _include_dir_merge_named_yaml(
209-
loader: SafeLineLoader, node: yaml.nodes.Node
281+
loader: LoaderType, node: yaml.nodes.Node
210282
) -> OrderedDict:
211283
"""Load multiple files from directory as a merged dictionary."""
212284
mapping: OrderedDict = OrderedDict()
213-
loc = os.path.join(os.path.dirname(loader.name), node.value)
285+
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
214286
for fname in _find_files(loc, "*.yaml"):
215287
if os.path.basename(fname) == SECRET_YAML:
216288
continue
@@ -221,10 +293,10 @@ def _include_dir_merge_named_yaml(
221293

222294

223295
def _include_dir_list_yaml(
224-
loader: SafeLineLoader, node: yaml.nodes.Node
296+
loader: LoaderType, node: yaml.nodes.Node
225297
) -> list[JSON_TYPE]:
226298
"""Load multiple files from directory as a list."""
227-
loc = os.path.join(os.path.dirname(loader.name), node.value)
299+
loc = os.path.join(os.path.dirname(loader.get_name()), node.value)
228300
return [
229301
load_yaml(f, loader.secrets)
230302
for f in _find_files(loc, "*.yaml")
@@ -233,10 +305,10 @@ def _include_dir_list_yaml(
233305

234306

235307
def _include_dir_merge_list_yaml(
236-
loader: SafeLineLoader, node: yaml.nodes.Node
308+
loader: LoaderType, node: yaml.nodes.Node
237309
) -> JSON_TYPE:
238310
"""Load multiple files from directory as a merged list."""
239-
loc: str = os.path.join(os.path.dirname(loader.name), node.value)
311+
loc: str = os.path.join(os.path.dirname(loader.get_name()), node.value)
240312
merged_list: list[JSON_TYPE] = []
241313
for fname in _find_files(loc, "*.yaml"):
242314
if os.path.basename(fname) == SECRET_YAML:
@@ -247,7 +319,7 @@ def _include_dir_merge_list_yaml(
247319
return _add_reference(merged_list, loader, node)
248320

249321

250-
def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> OrderedDict:
322+
def _ordered_dict(loader: LoaderType, node: yaml.nodes.MappingNode) -> OrderedDict:
251323
"""Load YAML mappings into an ordered dictionary to preserve key order."""
252324
loader.flatten_mapping(node)
253325
nodes = loader.construct_pairs(node)
@@ -259,14 +331,14 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
259331
try:
260332
hash(key)
261333
except TypeError as exc:
262-
fname = getattr(loader.stream, "name", "")
334+
fname = loader.get_stream_name()
263335
raise yaml.MarkedYAMLError(
264336
context=f'invalid key: "{key}"',
265337
context_mark=yaml.Mark(fname, 0, line, -1, None, None), # type: ignore[arg-type]
266338
) from exc
267339

268340
if key in seen:
269-
fname = getattr(loader.stream, "name", "")
341+
fname = loader.get_stream_name()
270342
_LOGGER.warning(
271343
'YAML file %s contains duplicate key "%s". Check lines %d and %d',
272344
fname,
@@ -279,13 +351,13 @@ def _ordered_dict(loader: SafeLineLoader, node: yaml.nodes.MappingNode) -> Order
279351
return _add_reference(OrderedDict(nodes), loader, node)
280352

281353

282-
def _construct_seq(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
354+
def _construct_seq(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
283355
"""Add line number and file name to Load YAML sequence."""
284356
(obj,) = loader.construct_yaml_seq(node)
285357
return _add_reference(obj, loader, node)
286358

287359

288-
def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
360+
def _env_var_yaml(loader: LoaderType, node: yaml.nodes.Node) -> str:
289361
"""Load environment variables and embed it into the configuration YAML."""
290362
args = node.value.split()
291363

@@ -298,27 +370,27 @@ def _env_var_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> str:
298370
raise HomeAssistantError(node.value)
299371

300372

301-
def secret_yaml(loader: SafeLineLoader, node: yaml.nodes.Node) -> JSON_TYPE:
373+
def secret_yaml(loader: LoaderType, node: yaml.nodes.Node) -> JSON_TYPE:
302374
"""Load secrets and embed it into the configuration YAML."""
303375
if loader.secrets is None:
304376
raise HomeAssistantError("Secrets not supported in this YAML file")
305377

306-
return loader.secrets.get(loader.name, node.value)
307-
308-
309-
SafeLineLoader.add_constructor("!include", _include_yaml)
310-
SafeLineLoader.add_constructor(
311-
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict
312-
)
313-
SafeLineLoader.add_constructor(
314-
yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq
315-
)
316-
SafeLineLoader.add_constructor("!env_var", _env_var_yaml)
317-
SafeLineLoader.add_constructor("!secret", secret_yaml)
318-
SafeLineLoader.add_constructor("!include_dir_list", _include_dir_list_yaml)
319-
SafeLineLoader.add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
320-
SafeLineLoader.add_constructor("!include_dir_named", _include_dir_named_yaml)
321-
SafeLineLoader.add_constructor(
322-
"!include_dir_merge_named", _include_dir_merge_named_yaml
323-
)
324-
SafeLineLoader.add_constructor("!input", Input.from_node)
378+
return loader.secrets.get(loader.get_name(), node.value)
379+
380+
381+
def add_constructor(tag: Any, constructor: Any) -> None:
382+
"""Add to constructor to all loaders."""
383+
for yaml_loader in (SafeLoader, SafeLineLoader):
384+
yaml_loader.add_constructor(tag, constructor)
385+
386+
387+
add_constructor("!include", _include_yaml)
388+
add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _ordered_dict)
389+
add_constructor(yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, _construct_seq)
390+
add_constructor("!env_var", _env_var_yaml)
391+
add_constructor("!secret", secret_yaml)
392+
add_constructor("!include_dir_list", _include_dir_list_yaml)
393+
add_constructor("!include_dir_merge_list", _include_dir_merge_list_yaml)
394+
add_constructor("!include_dir_named", _include_dir_named_yaml)
395+
add_constructor("!include_dir_merge_named", _include_dir_merge_named_yaml)
396+
add_constructor("!input", Input.from_node)
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
*!* NOT YAML
1+
-*!*- NOT YAML
22

0 commit comments

Comments
 (0)