Skip to content

Commit db97219

Browse files
committed
poc for secret string obfuscation in validate_dict
1 parent b732eb2 commit db97219

File tree

5 files changed

+76
-7
lines changed

5 files changed

+76
-7
lines changed

dlt/common/exceptions.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import Any, AnyStr, Dict, List, Sequence, Optional, Iterable, Type, TypedDict
22

3+
from dlt.common.typing import StrAny
4+
35

46
class ExceptionTrace(TypedDict, total=False):
57
"""Exception trace. NOTE: we intend to change it with an extended line by line trace with code snippets"""
@@ -97,18 +99,22 @@ class DictValidationException(DltException):
9799
def __init__(
98100
self,
99101
msg: str,
102+
doc: StrAny,
100103
path: str,
101104
expected_type: Type[Any] = None,
102105
field: str = None,
103106
value: Any = None,
104107
nested_exceptions: List["DictValidationException"] = None,
105108
) -> None:
109+
from dlt.common.utils import obfuscate_values_in_string
110+
111+
self.doc = doc
106112
self.path = path
107113
self.expected_type = expected_type
108114
self.field = field
109115
self.value = value
110116
self.nested_exceptions = nested_exceptions
111-
self.msg = msg
117+
self.msg = obfuscate_values_in_string(doc, msg)
112118
super().__init__(msg)
113119

114120
def __str__(self) -> str:

dlt/common/schema/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
243243
if not isinstance(pv, str):
244244
raise DictValidationException(
245245
f"field {pk} value {pv} has invalid type {type(pv).__name__} while str is expected",
246+
{},
246247
path,
247248
t,
248249
pk,
@@ -255,6 +256,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
255256
except Exception as e:
256257
raise DictValidationException(
257258
f"field {pk} value {pv[3:]} does not compile as regex: {str(e)}",
259+
{},
258260
path,
259261
t,
260262
pk,
@@ -264,6 +266,7 @@ def simple_regex_validator(path: str, pk: str, pv: Any, t: Any) -> bool:
264266
if RE_NON_ALPHANUMERIC_UNDERSCORE.match(pv):
265267
raise DictValidationException(
266268
f"field {pk} value {pv} looks like a regex, please prefix with re:",
269+
{},
267270
path,
268271
t,
269272
pk,
@@ -283,6 +286,7 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
283286
raise DictValidationException(
284287
f"field {pk} value {pv} has invalid type {type(pv).__name__} while"
285288
" str is expected",
289+
{},
286290
path,
287291
t,
288292
pk,
@@ -291,11 +295,11 @@ def validator(path: str, pk: str, pv: Any, t: Any) -> bool:
291295
try:
292296
if naming.normalize_path(pv) != pv:
293297
raise DictValidationException(
294-
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
298+
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
295299
)
296300
except ValueError:
297301
raise DictValidationException(
298-
f"field {pk}: {pv} is not a valid column name", path, t, pk, pv
302+
f"field {pk}: {pv} is not a valid column name", {}, path, t, pk, pv
299303
)
300304
return True
301305
else:

dlt/common/utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,3 +635,42 @@ def __getattribute__(self, name: str) -> Any:
635635
raise RuntimeError("This instance has been dropped and cannot be used anymore.")
636636

637637
return DefunctClass
638+
639+
640+
def obfuscate_string(s: str) -> str:
641+
"""Obfuscates string by replacing some or all characters with asterisks"""
642+
if len(s) < 6:
643+
return "*" * len(s)
644+
return s[0] + "*" * (len(s) - 2) + s[-1]
645+
646+
647+
def all_strings(d: StrAny) -> List[str]:
648+
"""Returns all string values found in the given object"""
649+
strings = []
650+
651+
if isinstance(d, str):
652+
return [d]
653+
if isinstance(d, dict):
654+
for v in d.values():
655+
if isinstance(v, str):
656+
strings.append(v)
657+
elif isinstance(v, dict):
658+
strings.extend(all_strings(v))
659+
elif isinstance(d, list):
660+
for v in d:
661+
strings.extend(all_strings(v))
662+
else:
663+
return []
664+
return strings
665+
666+
667+
def obfuscate_values_in_string(d: StrAny, msg: str) -> str:
668+
"""Obfuscates all string values found in the dictionary and its nested dictionaries in the message"""
669+
670+
# create mapping of obfuscated strings
671+
obfuscated_strings = {s: obfuscate_string(s) for s in all_strings(d)}
672+
673+
for s in obfuscated_strings:
674+
msg = msg.replace(s, obfuscated_strings[s])
675+
676+
return msg

dlt/common/validation.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,11 @@ def validate_dict(
6565
# check missing props
6666
missing = set(required_props.keys()).difference(props.keys())
6767
if len(missing):
68-
raise DictValidationException(f"following required fields are missing {missing}", path)
68+
raise DictValidationException(f"following required fields are missing {missing}", doc, path)
6969
# check unknown props
7070
unexpected = set(props.keys()).difference(allowed_props.keys())
7171
if len(unexpected):
72-
raise DictValidationException(f"following fields are unexpected {unexpected}", path)
72+
raise DictValidationException(f"following fields are unexpected {unexpected}", doc, path)
7373

7474
def verify_prop(pk: str, pv: Any, t: Any) -> None:
7575
# covers none in optional and union types
@@ -108,6 +108,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
108108
msg += f"For {get_type_name(failed.expected_type)}: " + str(failed) + "\n"
109109
raise DictValidationException(
110110
msg,
111+
doc,
111112
path,
112113
t,
113114
pk,
@@ -118,13 +119,14 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
118119
a_l = get_literal_args(t)
119120
if pv not in a_l:
120121
raise DictValidationException(
121-
f"field '{pk}' with value {pv} is not one of: {a_l}", path, t, pk, pv
122+
f"field '{pk}' with value {pv} is not one of: {a_l}", doc, path, t, pk, pv
122123
)
123124
elif t in [int, bool, str, float]:
124125
if not isinstance(pv, t):
125126
raise DictValidationException(
126127
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
127128
f" '{t.__name__}' is expected",
129+
doc,
128130
path,
129131
t,
130132
pk,
@@ -135,6 +137,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
135137
raise DictValidationException(
136138
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
137139
f" '{get_type_name(t)}' is expected",
140+
doc,
138141
path,
139142
t,
140143
pk,
@@ -146,6 +149,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
146149
raise DictValidationException(
147150
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
148151
" 'list' is expected",
152+
doc,
149153
path,
150154
t,
151155
pk,
@@ -160,6 +164,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
160164
raise DictValidationException(
161165
f"field '{pk}' with value {pv} has invalid type '{type(pv).__name__}' while"
162166
" 'dict' is expected",
167+
doc,
163168
path,
164169
t,
165170
pk,
@@ -170,7 +175,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
170175
for d_k, d_v in pv.items():
171176
if not isinstance(d_k, str):
172177
raise DictValidationException(
173-
f"field '{pk}' with key {d_k} must be a string", path, t, pk, d_k
178+
f"field '{pk}' with key {d_k} must be a string", doc, path, t, pk, d_k
174179
)
175180
verify_prop(f"{pk}[{d_k}]", d_v, d_v_t)
176181
elif t is Any:
@@ -188,6 +193,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
188193
raise DictValidationException(
189194
f"field '{pk}' expects callable (function or class instance) but got "
190195
f" '{pv}'. Mind that signatures are not validated",
196+
doc,
191197
path,
192198
t,
193199
pk,
@@ -203,6 +209,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
203209
raise DictValidationException(
204210
f"field '{pk}' expects class '{type_name}' but got instance of"
205211
f" '{pv_type_name}'",
212+
doc,
206213
path,
207214
t,
208215
pk,
@@ -212,6 +219,7 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
212219
type_name = get_type_name(t)
213220
raise DictValidationException(
214221
f"field '{pk}' has expected type '{type_name}' which lacks validator",
222+
doc,
215223
path,
216224
t,
217225
pk,

tests/common/test_validation.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Optional,
1616
Union,
1717
)
18+
from dlt.common.configuration.specs import BaseConfiguration
1819

1920
from dlt.common import Decimal, jsonpath
2021
from dlt.common.exceptions import DictValidationException
@@ -406,6 +407,17 @@ class TTestRecordClassUnion(TypedDict):
406407
validate_dict(TTestRecordClassUnion, test_item_2, path=".")
407408

408409

410+
def test_secrets_obfuscation() -> None:
411+
class Config(TypedDict):
412+
a: str
413+
b: int
414+
415+
with pytest.raises(DictValidationException) as e:
416+
validate_dict(Config, {"a": "123456", "b": {"c": "inner_value"}}, ".")
417+
assert "inner_value" not in e.value.msg
418+
assert "{'c': 'i*********e'}" in e.value.msg
419+
420+
409421
# def test_union_merge() -> None:
410422
# """Overriding fields is simply illegal in TypedDict"""
411423
# class EndpointResource(TypedDict, total=False):

0 commit comments

Comments
 (0)