Coverage for src/configuraptor/core.py: 100%
225 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-11-20 11:53 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2023-11-20 11:53 +0100
1"""
2Contains most of the loading logic.
3"""
5import dataclasses as dc
6import io
7import os
8import typing
9import warnings
10from pathlib import Path
11from typing import Any, Type
13import requests
15from . import loaders
16from .abs import C, T, T_data, Type_C
17from .alias import Alias
18from .binary_config import BinaryConfig
19from .errors import (
20 ConfigErrorCouldNotConvert,
21 ConfigErrorInvalidType,
22 ConfigErrorMissingKey,
23)
24from .helpers import (
25 all_annotations,
26 camel_to_snake,
27 check_type,
28 dataclass_field,
29 find_pyproject_toml,
30 is_custom_class,
31 is_optional,
32 is_parameterized,
33)
34from .postpone import Postponed
35from .type_converters import CONVERTERS
38def _data_for_nested_key(key: str, raw: dict[str, typing.Any]) -> dict[str, typing.Any]:
39 """
40 If a key contains a dot, traverse the raw dict until the right key was found.
42 Example:
43 key = some.nested.key
44 raw = {"some": {"nested": {"key": {"with": "data"}}}}
45 -> {"with": "data"}
46 """
47 parts = key.split(".")
48 while parts:
49 key = parts.pop(0)
50 if key not in raw:
51 return {}
53 raw = raw[key]
55 return raw
58def _guess_key(clsname: str) -> str:
59 """
60 If no key is manually defined for `load_into`, \
61 the class' name is converted to snake_case to use as the default key.
62 """
63 return camel_to_snake(clsname)
66def _from_mock_url(url: str) -> str:
67 """
68 Pytest only: when starting a url with mock:// it is expected to just be json afterwards.
69 """
70 return url.removeprefix("mock://")
73def guess_filetype_for_url(url: str, response: requests.Response = None) -> str:
74 """
75 Based on the url (which may have an extension) and the requests response \
76 (which may have a content-type), try to guess the right filetype (-> loader, e.g. json or yaml).
78 Falls back to JSON if none can be found.
79 """
80 url = url.split("?")[0]
81 if url_extension := os.path.splitext(url)[1].lower():
82 return url_extension.strip(".")
84 if response and (content_type_header := response.headers.get("content-type", "").split(";")[0].strip()):
85 content_type = content_type_header.split("/")[-1]
86 if content_type != "plain":
87 return content_type
89 # If both methods fail, default to JSON
90 return "json"
93def from_url(url: str, _dummy: bool = False) -> tuple[io.BytesIO, str]:
94 """
95 Load data as bytes into a file-like object and return the file type.
97 This can be used by __load_data:
98 > loader = loaders.get(filetype)
99 > # dev/null exists but always returns b''
100 > data = loader(contents, Path("/dev/null"))
101 """
102 if url.startswith("mock://"):
103 data = _from_mock_url(url)
104 resp = None
105 elif _dummy:
106 resp = None
107 data = "{}"
108 else:
109 resp = requests.get(url, timeout=10)
110 data = resp.text
112 filetype = guess_filetype_for_url(url, resp)
113 return io.BytesIO(data.encode()), filetype
116def _load_data(
117 data: T_data,
118 key: str = None,
119 classname: str = None,
120 lower_keys: bool = False,
121 allow_types: tuple[type, ...] = (dict,),
122) -> dict[str, typing.Any]:
123 """
124 Tries to load the right data from a filename/path or dict, based on a manual key or a classname.
126 E.g. class Tool will be mapped to key tool.
127 It also deals with nested keys (tool.extra -> {"tool": {"extra": ...}}
128 """
129 if isinstance(data, bytes):
130 # instantly return, don't modify
131 # bytes as inputs -> bytes as output
132 # but since `T_data` is re-used, that's kind of hard to type for mypy.
133 return data # type: ignore
135 if isinstance(data, list):
136 if not data:
137 raise ValueError("Empty list passed!")
139 final_data: dict[str, typing.Any] = {}
140 for source in data:
141 final_data |= load_data(source, key=key, classname=classname, lower_keys=True, allow_types=allow_types)
143 return final_data
145 if isinstance(data, str):
146 if data.startswith(("http://", "https://", "mock://")):
147 contents, filetype = from_url(data)
149 loader = loaders.get(filetype)
150 # dev/null exists but always returns b''
151 data = loader(contents, Path("/dev/null"))
152 else:
153 data = Path(data)
155 if isinstance(data, Path):
156 with data.open("rb") as f:
157 loader = loaders.get(data.suffix or data.name)
158 data = loader(f, data.resolve())
160 if not data:
161 return {}
163 if key is None:
164 # try to guess key by grabbing the first one or using the class name
165 if len(data) == 1:
166 key = next(iter(data.keys()))
167 elif classname is not None:
168 key = _guess_key(classname)
170 if key:
171 data = _data_for_nested_key(key, data)
173 if not data:
174 raise ValueError("No data found!")
176 if not isinstance(data, allow_types):
177 raise ValueError(f"Data should be one of {allow_types} but it is {type(data)}!")
179 if lower_keys and isinstance(data, dict):
180 data = {k.lower(): v for k, v in data.items()}
182 return typing.cast(dict[str, typing.Any], data)
185def load_data(
186 data: T_data,
187 key: str = None,
188 classname: str = None,
189 lower_keys: bool = False,
190 allow_types: tuple[type, ...] = (dict,),
191) -> dict[str, typing.Any]:
192 """
193 Wrapper around __load_data that retries with key="" if anything goes wrong.
194 """
195 if data is None:
196 # try to load pyproject.toml
197 data = find_pyproject_toml()
199 try:
200 return _load_data(data, key, classname, lower_keys=lower_keys, allow_types=allow_types)
201 except Exception as e:
202 # sourcery skip: remove-unnecessary-else, simplify-empty-collection-comparison, swap-if-else-branches
203 # @sourcery: `key != ""` is NOT the same as `not key`
204 if key != "":
205 return _load_data(data, "", classname, lower_keys=lower_keys, allow_types=allow_types)
206 else: # pragma: no cover
207 warnings.warn(f"Data could not be loaded: {e}", source=e)
208 # key already was "", just return data!
209 # (will probably not happen but fallback)
210 return {}
213F = typing.TypeVar("F")
216def convert_between(from_value: F, from_type: Type[F], to_type: Type[T]) -> T:
217 """
218 Convert a value between types.
219 """
220 if converter := CONVERTERS.get((from_type, to_type)):
221 return typing.cast(T, converter(from_value))
223 # default: just convert type:
224 return to_type(from_value) # type: ignore
227def check_and_convert_type(value: Any, _type: Type[T], convert_types: bool, key: str = "variable") -> T:
228 """
229 Checks if the given value matches the specified type. If it does, the value is returned as is.
231 Args:
232 value (Any): The value to be checked and potentially converted.
233 _type (Type[T]): The expected type for the value.
234 convert_types (bool): If True, allows type conversion if the types do not match.
235 key (str, optional): The name or key associated with the variable (used in error messages).
236 Defaults to "variable".
238 Returns:
239 T: The value, potentially converted to the expected type.
241 Raises:
242 ConfigErrorInvalidType: If the type does not match, and type conversion is not allowed.
243 ConfigErrorCouldNotConvert: If type conversion fails.
244 """
245 if check_type(value, _type):
246 # type matches
247 return value
249 if isinstance(value, Alias):
250 if is_optional(_type):
251 return typing.cast(T, None)
252 else:
253 # unresolved alias, error should've already been thrown for parent but lets do it again:
254 raise ConfigErrorInvalidType(value.to, value=value, expected_type=_type)
256 if not convert_types:
257 # type does not match and should not be converted
258 raise ConfigErrorInvalidType(key, value=value, expected_type=_type)
260 # else: type does not match, try to convert it
261 try:
262 return convert_between(value, type(value), _type)
263 except (TypeError, ValueError) as e:
264 raise ConfigErrorCouldNotConvert(type(value), _type, value) from e
267def ensure_types(
268 data: dict[str, T], annotations: dict[str, type[T]], convert_types: bool = False
269) -> dict[str, T | None]:
270 """
271 Make sure all values in 'data' are in line with the ones stored in 'annotations'.
273 If an annotated key in missing from data, it will be filled with None for convenience.
275 TODO: python 3.11 exception groups to throw multiple errors at once!
276 """
277 # custom object to use instead of None, since typing.Optional can be None!
278 # cast to T to make mypy happy
279 notfound = typing.cast(T, object())
281 final: dict[str, T | None] = {}
282 for key, _type in annotations.items():
283 compare = data.get(key, notfound)
284 if compare is notfound: # pragma: nocover
285 warnings.warn(
286 "This should not happen since " "`load_recursive` already fills `data` " "based on `annotations`"
287 )
288 # skip!
289 continue
291 if isinstance(compare, Postponed):
292 # don't do anything with this item!
293 continue
295 if isinstance(compare, Alias):
296 related_data = data.get(compare.to, notfound)
297 if related_data is not notfound:
298 if isinstance(related_data, Postponed):
299 # also continue alias for postponed items
300 continue
302 # original key set, update alias
303 compare = related_data
305 compare = check_and_convert_type(compare, _type, convert_types, key)
307 final[key] = compare
309 return final
312def convert_config(items: dict[str, T]) -> dict[str, T]:
313 """
314 Converts the config dict (from toml) or 'overwrites' dict in two ways.
316 1. removes any items where the value is None, since in that case the default should be used;
317 2. replaces '-' and '.' in keys with '_' so it can be mapped to the Config properties.
318 """
319 return {k.replace("-", "_").replace(".", "_"): v for k, v in items.items() if v is not None}
322AnyType: typing.TypeAlias = typing.Type[typing.Any]
323T_Type = typing.TypeVar("T_Type", bound=AnyType)
326def has_aliases(cls: AnyType, key: str) -> typing.Generator[str, None, None]:
327 """
328 Generate all aliases that point to 'key' in 'cls'.
329 """
330 for field, value in cls.__dict__.items():
331 if isinstance(value, Alias) and value.to == key:
332 yield field
335def has_alias(cls: AnyType, key: str, data: dict[str, T]) -> typing.Optional[T]:
336 """
337 Get the value of any alias in the same config class that references `key`.
339 Example:
340 class Config:
341 key1: str
342 key2: str = alias('key1')
344 load_into(Config, {'key2': 'something'})
345 # -> key1 will look up the value of key2 because it's configured as an alias for it.
347 If multiple aliases point to the same base, they are all iterated until a valid value was found.
348 """
349 # for field, value in cls.__dict__.items():
350 # if isinstance(value, Alias) and value.to == key:
351 # # yay!
352 # return data.get(field)
353 #
354 # return None
356 return next(
357 (value for field in has_aliases(cls, key) if (value := data.get(field))),
358 None,
359 )
362def load_recursive(
363 cls: AnyType, data: dict[str, T], annotations: dict[str, AnyType], convert_types: bool = False
364) -> dict[str, T]:
365 """
366 For all annotations (recursively gathered from parents with `all_annotations`), \
367 try to resolve the tree of annotations.
369 Uses `load_into_recurse`, not itself directly.
371 Example:
372 class First:
373 key: str
375 class Second:
376 other: First
378 # step 1
379 cls = Second
380 data = {"second": {"other": {"key": "anything"}}}
381 annotations: {"other": First}
383 # step 1.5
384 data = {"other": {"key": "anything"}
385 annotations: {"other": First}
387 # step 2
388 cls = First
389 data = {"key": "anything"}
390 annotations: {"key": str}
393 TODO: python 3.11 exception groups to throw multiple errors at once!
394 """
395 updated = {}
397 for _key, _type in annotations.items():
398 if _key in data:
399 value: typing.Any = data[_key] # value can change so define it as any instead of T
400 if is_parameterized(_type):
401 origin = typing.get_origin(_type)
402 arguments = typing.get_args(_type)
403 if origin is list and arguments and is_custom_class(arguments[0]):
404 subtype = arguments[0]
405 value = [_load_into_recurse(subtype, subvalue, convert_types=convert_types) for subvalue in value]
407 elif origin is dict and arguments and is_custom_class(arguments[1]):
408 # e.g. dict[str, Point]
409 subkeytype, subvaluetype = arguments
410 # subkey(type) is not a custom class, so don't try to convert it:
411 value = {
412 subkey: _load_into_recurse(subvaluetype, subvalue, convert_types=convert_types)
413 for subkey, subvalue in value.items()
414 }
415 # elif origin is dict:
416 # keep data the same
417 elif origin is typing.Union and arguments:
418 for arg in arguments:
419 if is_custom_class(arg):
420 value = _load_into_recurse(arg, value, convert_types=convert_types)
422 # todo: other parameterized/unions/typing.Optional
424 elif is_custom_class(_type):
425 # type must be C (custom class) at this point
426 value = _load_into_recurse(
427 # make mypy and pycharm happy by telling it _type is of type C...
428 # actually just passing _type as first arg!
429 typing.cast(Type_C[typing.Any], _type),
430 value,
431 convert_types=convert_types,
432 )
434 elif _key in cls.__dict__:
435 # property has default, use that instead.
436 value = cls.__dict__[_key]
437 elif is_optional(_type):
438 # type is optional and not found in __dict__ -> default is None
439 value = None
440 elif dc.is_dataclass(cls) and (field := dataclass_field(cls, _key)) and field.default_factory is not dc.MISSING:
441 # could have a default factory
442 # todo: do something with field.default?
443 value = field.default_factory()
444 elif value := has_alias(cls, _key, data):
445 # value updated by alias
446 ...
447 else:
448 raise ConfigErrorMissingKey(_key, cls, _type)
450 updated[_key] = value
452 return updated
455def check_and_convert_data(
456 cls: typing.Type[C],
457 data: dict[str, typing.Any],
458 _except: typing.Iterable[str],
459 strict: bool = True,
460 convert_types: bool = False,
461) -> dict[str, typing.Any]:
462 """
463 Based on class annotations, this prepares the data for `load_into_recurse`.
465 1. convert config-keys to python compatible config_keys
466 2. loads custom class type annotations with the same logic (see also `load_recursive`)
467 3. ensures the annotated types match the actual types after loading the config file.
468 """
469 annotations = all_annotations(cls, _except=_except)
471 to_load = convert_config(data)
472 to_load = load_recursive(cls, to_load, annotations, convert_types=convert_types)
474 if strict:
475 to_load = ensure_types(to_load, annotations, convert_types=convert_types)
477 return to_load
480T_init_list = list[typing.Any]
481T_init_dict = dict[str, typing.Any]
482T_init = tuple[T_init_list, T_init_dict] | T_init_list | T_init_dict | None
485@typing.no_type_check # (mypy doesn't understand 'match' fully yet)
486def _split_init(init: T_init) -> tuple[T_init_list, T_init_dict]:
487 """
488 Accept a tuple, a dict or a list of (arg, kwarg), {kwargs: ...}, [args] respectively and turn them all into a tuple.
489 """
490 if not init:
491 return [], {}
493 args: T_init_list = []
494 kwargs: T_init_dict = {}
495 match init:
496 case (args, kwargs):
497 return args, kwargs
498 case [*args]:
499 return args, {}
500 case {**kwargs}:
501 return [], kwargs
502 case _:
503 raise ValueError("Init must be either a tuple of list and dict, a list or a dict.")
506def _load_into_recurse(
507 cls: typing.Type[C],
508 data: dict[str, typing.Any] | bytes,
509 init: T_init = None,
510 strict: bool = True,
511 convert_types: bool = False,
512) -> C:
513 """
514 Loads an instance of `cls` filled with `data`.
516 Uses `load_recursive` to load any fillable annotated properties (see that method for an example).
517 `init` can be used to optionally pass extra __init__ arguments. \
518 NOTE: This will overwrite a config key with the same name!
519 """
520 init_args, init_kwargs = _split_init(init)
522 if isinstance(data, bytes) or issubclass(cls, BinaryConfig):
523 if not isinstance(data, (bytes, dict)): # pragma: no cover
524 raise NotImplementedError("BinaryConfig can only deal with `bytes` or a dict of bytes as input.")
525 elif not issubclass(cls, BinaryConfig): # pragma: no cover
526 raise NotImplementedError("Only BinaryConfig can be used with `bytes` (or a dict of bytes) as input.")
528 inst = typing.cast(C, cls._parse_into(data))
529 elif dc.is_dataclass(cls):
530 to_load = check_and_convert_data(cls, data, init_kwargs.keys(), strict=strict, convert_types=convert_types)
531 if init:
532 raise ValueError("Init is not allowed for dataclasses!")
534 # ensure mypy inst is an instance of the cls type (and not a fictuous `DataclassInstance`)
535 inst = typing.cast(C, cls(**to_load))
536 else:
537 inst = cls(*init_args, **init_kwargs)
538 to_load = check_and_convert_data(cls, data, inst.__dict__.keys(), strict=strict, convert_types=convert_types)
539 inst.__dict__.update(**to_load)
541 return inst
544def _load_into_instance(
545 inst: C,
546 cls: typing.Type[C],
547 data: dict[str, typing.Any],
548 init: T_init = None,
549 strict: bool = True,
550 convert_types: bool = False,
551) -> C:
552 """
553 Similar to `load_into_recurse` but uses an existing instance of a class (so after __init__) \
554 and thus does not support init.
556 """
557 if init is not None:
558 raise ValueError("Can not init an existing instance!")
560 existing_data = inst.__dict__
562 to_load = check_and_convert_data(
563 cls, data, _except=existing_data.keys(), strict=strict, convert_types=convert_types
564 )
566 inst.__dict__.update(**to_load)
568 return inst
571def load_into_class(
572 cls: typing.Type[C],
573 data: T_data,
574 /,
575 key: str = None,
576 init: T_init = None,
577 strict: bool = True,
578 lower_keys: bool = False,
579 convert_types: bool = False,
580) -> C:
581 """
582 Shortcut for _load_data + load_into_recurse.
583 """
584 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
585 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types)
586 return _load_into_recurse(cls, to_load, init=init, strict=strict, convert_types=convert_types)
589def load_into_instance(
590 inst: C,
591 data: T_data,
592 /,
593 key: str = None,
594 init: T_init = None,
595 strict: bool = True,
596 lower_keys: bool = False,
597 convert_types: bool = False,
598) -> C:
599 """
600 Shortcut for _load_data + load_into_existing.
601 """
602 cls = inst.__class__
603 allow_types = (dict, bytes) if issubclass(cls, BinaryConfig) else (dict,)
604 to_load = load_data(data, key, cls.__name__, lower_keys=lower_keys, allow_types=allow_types)
605 return _load_into_instance(inst, cls, to_load, init=init, strict=strict, convert_types=convert_types)
608def load_into(
609 cls: typing.Type[C],
610 data: T_data = None,
611 /,
612 key: str = None,
613 init: T_init = None,
614 strict: bool = True,
615 lower_keys: bool = False,
616 convert_types: bool = False,
617) -> C:
618 """
619 Load your config into a class (instance).
621 Supports both a class or an instance as first argument, but that's hard to explain to mypy, so officially only
622 classes are supported, and if you want to `load_into` an instance, you should use `load_into_instance`.
624 Args:
625 cls: either a class or an existing instance of that class.
626 data: can be a dictionary or a path to a file to load (as pathlib.Path or str)
627 key: optional (nested) dictionary key to load data from (e.g. 'tool.su6.specific')
628 init: optional data to pass to your cls' __init__ method (only if cls is not an instance already)
629 strict: enable type checks or allow anything?
630 lower_keys: should the config keys be lowercased? (for .env)
631 convert_types: should the types be converted to the annotated type if not yet matching? (for .env)
633 """
634 if not isinstance(cls, type):
635 # would not be supported according to mypy, but you can still load_into(instance)
636 return load_into_instance(
637 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
638 )
640 # make mypy and pycharm happy by telling it cls is of type C and not just 'type'
641 # _cls = typing.cast(typing.Type[C], cls)
642 return load_into_class(
643 cls, data, key=key, init=init, strict=strict, lower_keys=lower_keys, convert_types=convert_types
644 )