Coverage for phml\utilities\transform\sanitize\clean.py: 100%
58 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-06 15:05 -0500
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-06 15:05 -0500
1from re import match
3from phml.nodes import Element, Parent
5from .schema import Schema
8def sanatize(tree: Parent, schema: Schema = Schema()):
9 """Sanatize elements and attributes in the phml tree. Should be used when using
10 data from an unkown source. It should be used with an AST that has already been
11 compiled to html to no unkown values are unchecked.
13 By default the sanatization schema uses the github schema and follows the hast
14 sanatize utility.
16 * [github schema](https://github.com/syntax-tree/hast-util-sanitize/blob/main/lib/schema.js)
17 * [hast sanatize](https://github.com/syntax-tree/hast-util-sanitize)
19 Note:
20 This utility will edit the tree in place.
22 Args:
23 tree (Parent): The root of the tree that will be sanatized.
24 schema (Schema, optional): User defined schema. Defaults to github schema.
25 """
27 from phml.utilities import ( # pylint: disable=import-outside-toplevel
28 check,
29 is_element,
30 remove_nodes,
31 )
33 for strip in schema.strip:
34 remove_nodes(tree, ["element", {"tag": strip}])
36 def recurse_check_tag(node: Parent):
37 for child in list(node):
38 if isinstance(child, Element) and not is_element(child, schema.tag_names):
39 node.remove(child)
40 elif isinstance(child, Parent):
41 recurse_check_tag(child)
43 def recurse_check_ancestor(node: Parent):
44 for child in list(node):
45 if (
46 isinstance(child, Element)
47 and child.tag in schema.ancestors
48 and (
49 not isinstance(child.parent, Element)
50 or child.parent.tag not in schema.ancestors[child.tag]
51 )
52 ):
53 node.remove(child)
54 elif isinstance(child, Element):
55 recurse_check_ancestor(child)
57 def build_remove_attr_list(
58 properties: dict,
59 attributes: dict[str, tuple[str | bool, ...]],
60 valid_attributes: list,
61 ):
62 """Build the list of attributes to remove from a dict of attributes."""
63 result = []
64 for attribute in properties:
65 if attribute not in valid_attributes:
66 result.append(attribute)
67 elif attribute in attributes:
68 if (
69 isinstance(properties[attribute], str)
70 and attribute in schema.protocols
71 and not check_protocols(
72 properties[attribute], schema.protocols[attribute]
73 )
74 ):
75 result.append(attribute)
76 elif properties[attribute] != attributes[attribute]:
77 result.append(attribute)
78 elif (
79 isinstance(properties[attribute], str)
80 and attribute in schema.protocols
81 and not check_protocols(
82 properties[attribute], schema.protocols[attribute]
83 )
84 ):
85 result.append(attribute)
86 return result
88 def recurse_check_attributes(node: Parent):
89 for child in node:
90 if isinstance(child, Element):
91 if child.tag in schema.attributes:
92 pop_attrs = build_remove_attr_list(
93 child.attributes,
94 {
95 str(attr[0]): attr[1:]
96 for attr in (
97 schema.attributes[child.tag]
98 + schema.attributes.get("*", [])
99 )
100 if isinstance(attr, tuple)
101 },
102 [
103 attr if isinstance(attr, str) else attr[0]
104 for attr in (
105 schema.attributes[child.tag]
106 + schema.attributes.get("*", [])
107 )
108 ],
109 )
111 for attribute in pop_attrs:
112 child.pop(attribute, None)
114 recurse_check_attributes(child)
116 def recurse_check_required(node: Parent):
117 for child in node:
118 if isinstance(child, Element) and child.tag in schema.required:
119 for attr, value in schema.required[child.tag].items():
120 if attr not in child.attributes:
121 child[attr] = value
122 elif isinstance(value, bool):
123 child[attr] = str(value).lower()
124 elif isinstance(value, str) and child[attr] != value:
125 child[attr] = value
126 elif isinstance(child, Element):
127 recurse_check_required(child)
129 def check_protocols(value: str, protocols: list[str]):
130 return match(f"{'|'.join(protocols)}:.*", value) is not None
132 def recurse_strip(node):
133 for child in list(node):
134 if isinstance(child, Element) and is_element(child, schema.strip):
135 node.remove(child)
136 elif isinstance(child, Parent):
137 recurse_strip(child)
139 recurse_check_tag(tree)
140 recurse_strip(tree)
141 recurse_check_ancestor(tree)
142 recurse_check_attributes(tree)
143 recurse_check_required(tree)