phml.utilities.validate.validate

  1from re import match, split, sub
  2from typing import Any
  3
  4from phml.nodes import Element, Literal, Node, Parent
  5
  6__all__ = [
  7    "validate",
  8    "generated",
  9    "is_heading",
 10    "is_css_link",
 11    "is_css_style",
 12    "is_javascript",
 13    "is_element",
 14    "is_embedded",
 15    "is_interactive",
 16    "is_phrasing",
 17    "is_event_handler",
 18    "blank",
 19]
 20
 21
 22def validate(node: Node) -> bool:
 23    """Validate a node based on attributes and type."""
 24
 25    if isinstance(node, Parent) and not all(isinstance(child, Node) for child in node):
 26        raise AssertionError("Children must be a node type")
 27
 28    if isinstance(node, Element):
 29        if not all(isinstance(node[prop], (bool, str)) for prop in node.attributes):
 30            raise AssertionError("Element 'attributes' must be of type 'bool' or 'str'")
 31
 32    if isinstance(node, Literal) and not isinstance(node.content, str):
 33        raise AssertionError("Literal 'content' must be of type 'str'")
 34
 35    return True
 36
 37
 38def generated(node: Node) -> bool:
 39    """Checks if a node has been generated. A node is concidered
 40    generated if it does not have a position.
 41
 42    Args:
 43        node (Node): Node to check for position with.
 44
 45    Returns:
 46        bool: Whether a node has a position or not.
 47    """
 48    return node.position is None
 49
 50
 51def is_heading(node: Element) -> bool:
 52    """Check if an element is a heading."""
 53
 54    if node.type == "element":
 55        if match(r"h[1-6]", node.tag) is not None:
 56            return True
 57        return False
 58    raise TypeError("Node must be an element.")
 59
 60
 61def is_css_link(node: Element) -> bool:
 62    """Check if an element is a `link` to a css file.
 63
 64    Returns `true` if `node` is a `<link>` element with a `rel` list that
 65    contains `'stylesheet'` and has no `type`, an empty `type`, or `'text/css'`
 66    as its `type`
 67    """
 68
 69    return (
 70        # Verify it is a element with a `link` tag
 71        is_element(node, "link")
 72        # Must have a rel list with stylesheet
 73        and "rel" in node
 74        and "stylesheet" in split(r" ", sub(r" +", " ", node["rel"]))
 75        and (
 76            # Can have a `type` of `text/css` or empty or no `type`
 77            "type" not in node
 78            or ("type" in node and (node["type"] in ["text/css", ""]))
 79        )
 80    )
 81
 82
 83def is_css_style(node: Element) -> bool:
 84    """Check if an element is a css `style` element.
 85
 86    Returns `true` if `node` is a `<style>` element that
 87    has no `type`, an empty `type`, or `'text/css'` as its `type`.
 88    """
 89
 90    return is_element(node, "style") and (
 91        "type" not in node or ("type" in node and (node["type"] in ["", "text/css"]))
 92    )
 93
 94
 95def is_javascript(node: Element) -> bool:
 96    """Check if an element is a javascript `script` element.
 97
 98    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
 99    `type` and a valid JavaScript `language`, or has neither.
100    """
101    return is_element(node, "script") and (
102        (
103            "type" in node
104            and node["type"] in ["text/ecmascript", "text/javascript"]
105            and "language" not in node
106        )
107        or (
108            "language" in node
109            and node["language"] in ["ecmascript", "javascript"]
110            and "type" not in node
111        )
112        or ("type" not in node and "language" not in node)
113    )
114
115
116def is_element(node: Node, *conditions: str | list) -> bool:
117    """Checks if the given node is a certain element.
118
119    When providing a str it will check that the elements tag matches.
120    If a list is provided it checks that one of the conditions in the list
121    passes.
122    """
123
124    if isinstance(node, Element):
125        if len(conditions) > 0:
126            return any(
127                bool(
128                    (isinstance(condition, str) and node.tag == condition)
129                    or (
130                        isinstance(condition, list)
131                        and any(node.tag == nested for nested in condition)
132                    ),
133                )
134                for condition in conditions
135            )
136        else:
137            return True
138    return False
139
140
141def is_event_handler(attribute: str) -> bool:
142    """Takes a attribute name and returns true if
143    it starts with `on` and its length is `5` or more.
144    """
145    return attribute.startswith("on") and len(attribute) >= 5
146
147
148def is_embedded(node: Element) -> bool:
149    """Check to see if an element is an embedded element.
150
151    Embedded Elements:
152
153    * audio
154    * canvas
155    * embed
156    * iframe
157    * img
158    * MathML math
159    * object
160    * picture
161    * SVG svg
162    * video
163
164    Returns:
165        True if emedded
166    """
167    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
168
169    return is_element(
170        node,
171        "audio",
172        "canvas",
173        "embed",
174        "iframe",
175        "img",
176        "math",
177        "object",
178        "picture",
179        "svg",
180        "video",
181    )
182
183
184def is_interactive(node: Element) -> bool:
185    """Check if the element is intended for user interaction.
186
187    Conditions:
188
189    * a (if the href attribute is present)
190    * audio (if the controls attribute is present)
191    * button, details, embed, iframe, img (if the usemap attribute is present)
192    * input (if the type attribute is not in the Hidden state)
193    * label, select, text, area, video (if the controls attribute is present)
194
195    Returns:
196        True if element is interactive
197    """
198
199    if is_element(node, "a"):
200        return "href" in node
201
202    if is_element(node, "input"):
203        return "type" in node and str(node["type"]).lower() != "hidden"
204
205    if is_element(node, "img"):
206        return "usemap" in node and node["usemap"] is True
207
208    if is_element(node, "video"):
209        return "controls" in node
210
211    if is_element(
212        node, "button", "details", "embed", "iframe", "label", "select", "textarea"
213    ):
214        return True
215
216    return False
217
218
219def is_phrasing(node: Element) -> bool:
220    """Check if a node is phrasing text according to
221    https://html.spec.whatwg.org/#phrasing-content-2.
222
223    Phrasing content is the text of the document, as well as elements that mark up that text at the
224    intra-paragraph level. Runs of phrasing content form paragraphs.
225
226    * area (if it is a descendant of a map element)
227    * link (if it is allowed in the body)
228    * meta (if the itemprop attribute is present)
229    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
230     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
231     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
232     template, textarea, time, u, var, video, wbr, text (true)
233
234    Returns:
235        True if the element is phrasing text
236    """
237
238    if Literal.is_text(node):
239        return True
240
241    if is_element(node, "area"):
242        return node.parent is not None and is_element(node.parent, "map")
243
244    if is_element(node, "meta"):
245        return "itemprop" in node
246
247    if is_element(node, "link"):
248        body_ok = [
249            "dns-prefetch",
250            "modulepreload",
251            "pingback",
252            "preconnect",
253            "prefetch",
254            "preload",
255            "prerender",
256            "stylesheet",
257        ]
258
259        return bool(
260            "itemprop" in node
261            or (
262                "rel" in node
263                and all(
264                    token in body_ok for token in str(node["rel"]).split(" ")
265                    if token.strip() != ""
266                )
267            ),
268        )
269
270    if is_element(
271        node,
272        "node",
273        "map",
274        "mark",
275        "math",
276        "audio",
277        "b",
278        "bdi",
279        "bdo",
280        "br",
281        "button",
282        "canvas",
283        "cite",
284        "code",
285        "data",
286        "datalist",
287        "del",
288        "dfn",
289        "em",
290        "embed",
291        "i",
292        "iframe",
293        "img",
294        "input",
295        "ins",
296        "kbd",
297        "label",
298        "a",
299        "abbr",
300        "meter",
301        "noscript",
302        "object",
303        "output",
304        "picture",
305        "progress",
306        "q",
307        "ruby",
308        "s",
309        "samp",
310        "script",
311        "select",
312        "slot",
313        "small",
314        "span",
315        "strong",
316        "sub",
317        "sup",
318        "svg",
319        "template",
320        "textarea",
321        "time",
322        "u",
323        "var",
324        "video",
325        "wbr",
326    ):
327        return True
328
329    return False
330
331
332def blank(value: Any) -> bool:
333    """Takes any value type and returns whether it is blank/None.
334    For strings if the value is stripped and is equal to '' then it is blank.
335    Otherwise if len > 0 and is not None then not blank.
336
337    Args:
338        value (Any): The value to check if it is blank.
339
340    Returns:
341        bool: True if value is blank
342    """
343
344    if value is None or not hasattr(value, "__len__"):
345        return True
346
347    if isinstance(value, str):
348        value = value.strip()
349
350    return len(value) == 0
def validate(node: phml.nodes.Node) -> bool:
23def validate(node: Node) -> bool:
24    """Validate a node based on attributes and type."""
25
26    if isinstance(node, Parent) and not all(isinstance(child, Node) for child in node):
27        raise AssertionError("Children must be a node type")
28
29    if isinstance(node, Element):
30        if not all(isinstance(node[prop], (bool, str)) for prop in node.attributes):
31            raise AssertionError("Element 'attributes' must be of type 'bool' or 'str'")
32
33    if isinstance(node, Literal) and not isinstance(node.content, str):
34        raise AssertionError("Literal 'content' must be of type 'str'")
35
36    return True

Validate a node based on attributes and type.

def generated(node: phml.nodes.Node) -> bool:
39def generated(node: Node) -> bool:
40    """Checks if a node has been generated. A node is concidered
41    generated if it does not have a position.
42
43    Args:
44        node (Node): Node to check for position with.
45
46    Returns:
47        bool: Whether a node has a position or not.
48    """
49    return node.position is None

Checks if a node has been generated. A node is concidered generated if it does not have a position.

Args
  • node (Node): Node to check for position with.
Returns

bool: Whether a node has a position or not.

def is_heading(node: phml.nodes.Element) -> bool:
52def is_heading(node: Element) -> bool:
53    """Check if an element is a heading."""
54
55    if node.type == "element":
56        if match(r"h[1-6]", node.tag) is not None:
57            return True
58        return False
59    raise TypeError("Node must be an element.")

Check if an element is a heading.

def is_css_style(node: phml.nodes.Element) -> bool:
84def is_css_style(node: Element) -> bool:
85    """Check if an element is a css `style` element.
86
87    Returns `true` if `node` is a `<style>` element that
88    has no `type`, an empty `type`, or `'text/css'` as its `type`.
89    """
90
91    return is_element(node, "style") and (
92        "type" not in node or ("type" in node and (node["type"] in ["", "text/css"]))
93    )

Check if an element is a css style element.

Returns true if node is a <style> element that has no type, an empty type, or 'text/css' as its type.

def is_javascript(node: phml.nodes.Element) -> bool:
 96def is_javascript(node: Element) -> bool:
 97    """Check if an element is a javascript `script` element.
 98
 99    Returns `true` if `node` is a `<script>` element that has a valid JavaScript `type`, has no
100    `type` and a valid JavaScript `language`, or has neither.
101    """
102    return is_element(node, "script") and (
103        (
104            "type" in node
105            and node["type"] in ["text/ecmascript", "text/javascript"]
106            and "language" not in node
107        )
108        or (
109            "language" in node
110            and node["language"] in ["ecmascript", "javascript"]
111            and "type" not in node
112        )
113        or ("type" not in node and "language" not in node)
114    )

Check if an element is a javascript script element.

Returns true if node is a <script> element that has a valid JavaScript type, has no type and a valid JavaScript language, or has neither.

def is_element(node: phml.nodes.Node, *conditions: str | list) -> bool:
117def is_element(node: Node, *conditions: str | list) -> bool:
118    """Checks if the given node is a certain element.
119
120    When providing a str it will check that the elements tag matches.
121    If a list is provided it checks that one of the conditions in the list
122    passes.
123    """
124
125    if isinstance(node, Element):
126        if len(conditions) > 0:
127            return any(
128                bool(
129                    (isinstance(condition, str) and node.tag == condition)
130                    or (
131                        isinstance(condition, list)
132                        and any(node.tag == nested for nested in condition)
133                    ),
134                )
135                for condition in conditions
136            )
137        else:
138            return True
139    return False

Checks if the given node is a certain element.

When providing a str it will check that the elements tag matches. If a list is provided it checks that one of the conditions in the list passes.

def is_embedded(node: phml.nodes.Element) -> bool:
149def is_embedded(node: Element) -> bool:
150    """Check to see if an element is an embedded element.
151
152    Embedded Elements:
153
154    * audio
155    * canvas
156    * embed
157    * iframe
158    * img
159    * MathML math
160    * object
161    * picture
162    * SVG svg
163    * video
164
165    Returns:
166        True if emedded
167    """
168    # audio,canvas,embed,iframe,img,MathML math,object,picture,SVG svg,video
169
170    return is_element(
171        node,
172        "audio",
173        "canvas",
174        "embed",
175        "iframe",
176        "img",
177        "math",
178        "object",
179        "picture",
180        "svg",
181        "video",
182    )

Check to see if an element is an embedded element.

Embedded Elements:

  • audio
  • canvas
  • embed
  • iframe
  • img
  • MathML math
  • object
  • picture
  • SVG svg
  • video
Returns

True if emedded

def is_interactive(node: phml.nodes.Element) -> bool:
185def is_interactive(node: Element) -> bool:
186    """Check if the element is intended for user interaction.
187
188    Conditions:
189
190    * a (if the href attribute is present)
191    * audio (if the controls attribute is present)
192    * button, details, embed, iframe, img (if the usemap attribute is present)
193    * input (if the type attribute is not in the Hidden state)
194    * label, select, text, area, video (if the controls attribute is present)
195
196    Returns:
197        True if element is interactive
198    """
199
200    if is_element(node, "a"):
201        return "href" in node
202
203    if is_element(node, "input"):
204        return "type" in node and str(node["type"]).lower() != "hidden"
205
206    if is_element(node, "img"):
207        return "usemap" in node and node["usemap"] is True
208
209    if is_element(node, "video"):
210        return "controls" in node
211
212    if is_element(
213        node, "button", "details", "embed", "iframe", "label", "select", "textarea"
214    ):
215        return True
216
217    return False

Check if the element is intended for user interaction.

Conditions:

  • a (if the href attribute is present)
  • audio (if the controls attribute is present)
  • button, details, embed, iframe, img (if the usemap attribute is present)
  • input (if the type attribute is not in the Hidden state)
  • label, select, text, area, video (if the controls attribute is present)
Returns

True if element is interactive

def is_phrasing(node: phml.nodes.Element) -> bool:
220def is_phrasing(node: Element) -> bool:
221    """Check if a node is phrasing text according to
222    https://html.spec.whatwg.org/#phrasing-content-2.
223
224    Phrasing content is the text of the document, as well as elements that mark up that text at the
225    intra-paragraph level. Runs of phrasing content form paragraphs.
226
227    * area (if it is a descendant of a map element)
228    * link (if it is allowed in the body)
229    * meta (if the itemprop attribute is present)
230    * map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn,
231     em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output,
232     picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg,
233     template, textarea, time, u, var, video, wbr, text (true)
234
235    Returns:
236        True if the element is phrasing text
237    """
238
239    if Literal.is_text(node):
240        return True
241
242    if is_element(node, "area"):
243        return node.parent is not None and is_element(node.parent, "map")
244
245    if is_element(node, "meta"):
246        return "itemprop" in node
247
248    if is_element(node, "link"):
249        body_ok = [
250            "dns-prefetch",
251            "modulepreload",
252            "pingback",
253            "preconnect",
254            "prefetch",
255            "preload",
256            "prerender",
257            "stylesheet",
258        ]
259
260        return bool(
261            "itemprop" in node
262            or (
263                "rel" in node
264                and all(
265                    token in body_ok for token in str(node["rel"]).split(" ")
266                    if token.strip() != ""
267                )
268            ),
269        )
270
271    if is_element(
272        node,
273        "node",
274        "map",
275        "mark",
276        "math",
277        "audio",
278        "b",
279        "bdi",
280        "bdo",
281        "br",
282        "button",
283        "canvas",
284        "cite",
285        "code",
286        "data",
287        "datalist",
288        "del",
289        "dfn",
290        "em",
291        "embed",
292        "i",
293        "iframe",
294        "img",
295        "input",
296        "ins",
297        "kbd",
298        "label",
299        "a",
300        "abbr",
301        "meter",
302        "noscript",
303        "object",
304        "output",
305        "picture",
306        "progress",
307        "q",
308        "ruby",
309        "s",
310        "samp",
311        "script",
312        "select",
313        "slot",
314        "small",
315        "span",
316        "strong",
317        "sub",
318        "sup",
319        "svg",
320        "template",
321        "textarea",
322        "time",
323        "u",
324        "var",
325        "video",
326        "wbr",
327    ):
328        return True
329
330    return False

Check if a node is phrasing text according to https://html.spec.whatwg.org/#phrasing-content-2.

Phrasing content is the text of the document, as well as elements that mark up that text at the intra-paragraph level. Runs of phrasing content form paragraphs.

  • area (if it is a descendant of a map element)
  • link (if it is allowed in the body)
  • meta (if the itemprop attribute is present)
  • map, mark, math, audio, b, bdi, bdo, br, button, canvas, cite, code, data, datalist, del, dfn, em, embed, i, iframe, img, input, ins, kbd, label, a, abbr, meter, noscript, object, output, picture, progress, q, ruby, s, samp, script, select, slot, small, span, strong, sub, sup, svg, template, textarea, time, u, var, video, wbr, text (true)
Returns

True if the element is phrasing text

def is_event_handler(attribute: str) -> bool:
142def is_event_handler(attribute: str) -> bool:
143    """Takes a attribute name and returns true if
144    it starts with `on` and its length is `5` or more.
145    """
146    return attribute.startswith("on") and len(attribute) >= 5

Takes a attribute name and returns true if it starts with on and its length is 5 or more.

def blank(value: Any) -> bool:
333def blank(value: Any) -> bool:
334    """Takes any value type and returns whether it is blank/None.
335    For strings if the value is stripped and is equal to '' then it is blank.
336    Otherwise if len > 0 and is not None then not blank.
337
338    Args:
339        value (Any): The value to check if it is blank.
340
341    Returns:
342        bool: True if value is blank
343    """
344
345    if value is None or not hasattr(value, "__len__"):
346        return True
347
348    if isinstance(value, str):
349        value = value.strip()
350
351    return len(value) == 0

Takes any value type and returns whether it is blank/None. For strings if the value is stripped and is equal to '' then it is blank. Otherwise if len > 0 and is not None then not blank.

Args
  • value (Any): The value to check if it is blank.
Returns

bool: True if value is blank