Coverage for phml\utilities\locate\select.py: 100%
214 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-05 15:06 -0500
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-05 15:06 -0500
1"""utilities.select
3A collection of utilities around querying for specific
4types of data.
5"""
7# PERF: Support for all `:` selectors from https://www.w3schools.com/cssref/css_selectors.php
8# - Strip all `::` selectors and `:` not supported by phml implementation
9# - This will allow for parsing of css selectors and and adding scoping to component style elements
10# Add a data-phml-style-scope attribute to matching elements in the components. Edit the selector to then
11# have :is([data-phml-style-scope="phml-<hash>"])<selector>
13import re
14from typing import Callable
16from phml.nodes import Element, Node, Parent
17from phml.utilities.travel.travel import walk
19__all__ = ["query", "query_all", "matches", "parse_specifiers"]
22def query(tree: Parent, specifier: str) -> Element | None:
23 """Same as javascripts querySelector. `#` indicates an id and `.`
24 indicates a class. If they are used alone they match anything.
25 Any tag can be used by itself or with `#` and/or `.`. You may use
26 any number of class specifiers, but may only use one id specifier per
27 tag name. Complex specifiers are accepted are allowed meaning you can
28 have space seperated specifiers indicating nesting or a parent child
29 relationship.
31 Rules:
32 * `*` = any element
33 * `>` = direct child of the current element
34 * `+` = first sibling
35 * `~` = elements after the current element
36 * `.` = class
37 * `#` = id
38 * `[attribute]` = elements with attribute
39 * `[attribute=value]` = elements with attribute=value
40 * `[attribute~=value]` = elements with attribute containing value
41 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
42 * `[attribute^=value]` = elements with an attribute starting with value
43 * `[attribute$=value]` = elements with an attribute ending with value
44 * `[attribute*=value]` = elements with an attribute containing value
46 Examles:
47 * `.some-example` matches the first element with the class `some-example`
48 * `#some-example` matches the first element with the id `some-example`
49 * `li` matches the first `li` element
50 * `li.red` matches the first `li` with the class `red`
51 * `li#red` matches the first `li` with the id `red`
52 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
53 * `div.form-control input[type="checkbox"]` matches the first `input` with the
54 attribute `type="checked"` that has a parent `div` with the class `form-control`.
56 Return:
57 Element | None: The first element matching the specifier or None if no element was
58 found.
59 """
61 def all_nodes(current: Parent, rules: list, include_self: bool = True):
62 """Get all nodes starting with the current node."""
64 result = None
65 for node in walk(current):
66 if isinstance(node, Element) and (include_self or node != current):
67 result = branch(node, rules)
68 if result is not None:
69 break
70 return result
72 def all_children(current: Parent, rules: list):
73 """Get all children of the curret node."""
74 result = None
75 for node in current:
76 if isinstance(node, Element):
77 result = branch(node, rules)
78 if result is not None:
79 break
80 return result
82 def first_sibling(node: Parent, rules: list):
83 """Get the first sibling following the node."""
84 if node.parent is None:
85 return None
87 idx = node.parent.index(node)
88 if idx + 1 < len(node.parent) and isinstance(node.parent[idx + 1], Element):
89 return branch(node.parent[idx + 1], rules)
90 return None
92 def all_siblings(current: Parent, rules: list):
93 """Get all siblings after the current node."""
94 if current.parent is None:
95 return None
97 result = None
98 idx = current.parent.index(current)
99 if idx + 1 < len(current.parent):
100 for node in range(idx + 1, len(current.parent)):
101 if isinstance(current.parent[node], Element):
102 result = branch(current.parent[node], rules)
103 if result is not None:
104 break
105 return result
107 def process_dict(rules: list, node: Element):
108 if is_equal(rules[0], node):
109 if len(rules) - 1 == 0:
110 return node
112 if isinstance(rules[1], dict) or rules[1] == "*":
113 return (
114 all_nodes(node, rules[1:], False)
115 if isinstance(rules[1], dict)
116 else all_nodes(node, rules[2:], False)
117 )
119 return branch(node, rules[1:])
120 return None
122 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements
123 """Based on the current rule, recursively check the nodes.
124 If on the last rule then return the current valid node.
125 """
127 if isinstance(node, Parent):
128 if len(rules) == 0:
129 return node
131 if isinstance(rules[0], dict) and isinstance(node, Element):
132 return process_dict(rules, node)
134 if rules[0] == "*":
135 return all_nodes(node, rules[1:])
137 if rules[0] == ">":
138 return all_children(node, rules[1:])
140 if rules[0] == "+":
141 return first_sibling(node, rules[1:])
143 if rules[0] == "~":
144 return all_siblings(node, rules[1:])
146 rules = parse_specifiers(specifier)
147 return all_nodes(tree, rules)
150def query_all(tree: Parent, specifier: str) -> list[Element]:
151 """Same as javascripts querySelectorAll. `#` indicates an id and `.`
152 indicates a class. If they are used alone they match anything.
153 Any tag can be used by itself or with `#` and/or `.`. You may use
154 any number of class specifiers, but may only use one id specifier per
155 tag name. Complex specifiers are accepted are allowed meaning you can
156 have space seperated specifiers indicating nesting or a parent child
157 relationship.
159 Rules:
160 * `*` = any element
161 * `>` = direct child of the current element
162 * `+` = first sibling
163 * `~` = elements after the current element
164 * `.` = class
165 * `#` = id
166 * `[attribute]` = elements with attribute
167 * `[attribute=value]` = elements with attribute=value
168 * `[attribute~=value]` = elements with attribute containing value
169 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
170 * `[attribute^=value]` = elements with an attribute starting with value
171 * `[attribute$=value]` = elements with an attribute ending with value
172 * `[attribute*=value]` = elements with an attribute containing value
174 Examles:
175 * `.some-example` matches the first element with the class `some-example`
176 * `#some-example` matches the first element with the id `some-example`
177 * `li` matches the first `li` element
178 * `li.red` matches the first `li` with the class `red`
179 * `li#red` matches the first `li` with the id `red`
180 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"`
181 * `div.form-control input[type="checkbox"]` matches the first `input` with the
182 attribute `type="checked"` that has a parent `div` with the class `form-control`.
184 Return:
185 list[Element] | None: The all elements matching the specifier or and empty list if no
186 elements were found.
187 """
189 def all_nodes(current: Parent, rules: list, include_self: bool = True):
190 """Get all nodes starting with the current node."""
191 results = []
192 for node in walk(current):
193 if isinstance(node, Element) and (include_self or node != current):
194 results.extend(branch(node, rules) or [])
195 return results
197 def all_children(current: Parent, rules: list):
198 """Get all children of the curret node."""
199 results = []
200 for node in current:
201 if isinstance(node, Element):
202 results.extend(branch(node, rules) or [])
203 return results
205 def first_sibling(node: Parent, rules: list):
206 """Get the first sibling following the node."""
207 if node.parent is None:
208 return []
210 idx = node.parent.index(node)
211 if idx + 1 < len(node.parent) and node.parent[idx + 1].type == "element":
212 result = branch(node.parent[idx + 1], rules)
213 return result
214 return []
216 def all_siblings(current: Parent, rules: list):
217 """Get all siblings after the current node."""
218 if current.parent is None:
219 return []
221 results = []
222 idx = current.parent.index(current)
223 if idx + 1 < len(current.parent):
224 for node in range(idx + 1, len(current.parent)):
225 if current.parent[node].type == "element":
226 results.extend(branch(current.parent[node], rules) or [])
227 return results
229 def process_dict(rules: list, node: Element):
230 if is_equal(rules[0], node):
231 if len(rules) - 1 == 0:
232 return [node]
234 if isinstance(rules[1], dict) or rules[1] == "*":
235 return (
236 all_nodes(node, rules[1:])
237 if isinstance(rules[1], dict)
238 else all_nodes(node, rules[2:], False)
239 )
241 return branch(node, rules[1:])
242 return []
244 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements
245 """Based on the current rule, recursively check the nodes.
246 If on the last rule then return the current valid node.
247 """
249 if isinstance(node, Parent):
250 if len(rules) == 0:
251 return [node]
253 if isinstance(rules[0], dict) and isinstance(node, Element):
254 return process_dict(rules, node)
256 if rules[0] == "*":
257 return all_nodes(node, rules[1:])
259 if rules[0] == ">":
260 return all_children(node, rules[1:])
262 if rules[0] == "+":
263 return first_sibling(node, rules[1:])
265 if rules[0] == "~":
266 return all_siblings(node, rules[1:])
268 rules = parse_specifiers(specifier)
269 return all_nodes(tree, rules)
270 # return [result[i] for i in range(len(result)) if i == result.index(result[i])]
273def matches(node: Element, specifier: str) -> bool:
274 """Works the same as the Javascript matches. `#` indicates an id and `.`
275 indicates a class. If they are used alone they match anything.
276 Any tag can be used by itself or with `#` and/or `.`. You may use
277 any number of class specifiers, but may only use one id specifier per
278 tag name. Complex specifiers are not supported. Everything in the specifier
279 must relate to one element/tag.
281 Rules:
282 * `.` = class
283 * `#` = id
284 * `[attribute]` = elements with attribute
285 * `[attribute=value]` = elements with attribute=value
286 * `[attribute~=value]` = elements with attribute containing value
287 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
288 * `[attribute^=value]` = elements with an attribute starting with value
289 * `[attribute$=value]` = elements with an attribute ending with value
290 * `[attribute*=value]` = elements with an attribute containing value
292 Examles:
293 * `.some-example` matches the element with the class `some-example`
294 * `#some-example` matches the element with the id `some-example`
295 * `li` matches an `li` element
296 * `li.red` matches the an `li` with the class `red`
297 * `li#red` matches the an `li` with the id `red`
298 * `input[type="checkbox"]` matches the `input` element with the attribute `type="checkbox"`
299 """
301 rules = parse_specifiers(specifier)
303 if len(rules) > 1:
304 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}")
305 if not isinstance(rules[0], dict):
306 raise Exception(
307 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\
308Example: `li.red#sample[class^='form-'][title~='sample']`",
309 )
311 return is_equal(rules[0], node)
314def is_equal(rule: dict, node: Node) -> bool:
315 """Checks if a rule is valid on a node.
316 A rule is a dictionary of possible values and each value must
317 be valid on the node.
319 A rule may have a tag, id, classList, and attribute list:
320 * If the `tag` is provided, the nodes `tag` must match the rules `tag`
321 * If the `id` is provided, the nodes `id` must match the rules `id`
322 * If the `classList` is not empty, each class in the `classList` must exist in the nodes
323 class attribute
324 * If the `attribute` list is not empty, each attribute in the attribute list with be compared
325 against the nodes attributes given the `attribute` lists comparators. Below is the list of
326 possible comparisons.
327 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's
328 value.
329 2. Equals: `[checked='no']` yields any element with `checked='no'`
330 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class
331 containing `sample`
332 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have
333 a class that equals `sample` or or a class that starts with `sample-`
334 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample`
335 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample`
337 Args:
338 rule (dict): The rule to apply to the node.
339 node (Element): The node the validate.
341 Returns:
342 bool: Whether the node passes all the rules in the dictionary.
343 """
344 # Validate tag
345 if rule["tag"] != "*" and rule["tag"] != node.tag:
346 return False
348 # Validate id
349 if rule["id"] is not None and ("id" not in node or rule["id"] != node["id"]):
350 return False
352 # Validate class list
353 if len(rule["classList"]) > 0:
354 for klass in rule["classList"]:
355 if "class" not in node or klass not in str(node["class"]).split(" "):
356 return False
358 # Validate all attributes
359 if len(rule["attributes"]) > 0:
360 return all(
361 attr["name"] in node.attributes and __validate_attr(attr, node)
362 for attr in rule["attributes"]
363 )
365 return True
368def compare_equal(attr: str, c_value: str) -> bool:
369 return attr == c_value
372def compare_equal_or_start_with_value_dash(attr: str, c_value: str) -> bool:
373 return attr == c_value or attr.startswith(f"{c_value}-")
376def compare_startswith(attr: str, c_value: str) -> bool:
377 return attr.startswith(c_value)
380def compare_endswith(attr: str, c_value: str) -> bool:
381 return attr.endswith(c_value)
384def compare_contains(attr: str, c_value: str) -> bool:
385 return c_value in attr
388def compare_exists(attr: str, _) -> bool:
389 return attr == "true"
392def __validate_attr(attr: dict, node: Element):
393 attribute = node[attr["name"]]
394 if isinstance(attribute, bool):
395 attribute = str(node[attr["name"]]).lower()
397 if attr["compare"] == "=":
398 return is_valid_attr(
399 attr=attribute,
400 sub=attr["value"],
401 name=attr["name"],
402 validator=compare_equal,
403 )
405 if attr["compare"] == "|=":
406 return is_valid_attr(
407 attr=attribute,
408 sub=attr["value"],
409 name=attr["name"],
410 validator=compare_equal_or_start_with_value_dash,
411 )
413 if attr["compare"] == "^=":
414 return is_valid_attr(
415 attr=attribute,
416 sub=attr["value"],
417 name=attr["name"],
418 validator=compare_startswith,
419 )
421 if attr["compare"] == "$=":
422 return is_valid_attr(
423 attr=attribute,
424 sub=attr["value"],
425 name=attr["name"],
426 validator=compare_endswith,
427 )
429 if attr["compare"] in ["*=", "~="]:
430 return is_valid_attr(
431 attr=attribute,
432 sub=attr["value"],
433 name=attr["name"],
434 validator=compare_contains,
435 )
437 if attr["compare"] == "" and attr["value"] == "":
438 return is_valid_attr(
439 attr=attribute,
440 sub=attr["value"],
441 name=attr["name"],
442 validator=compare_exists,
443 )
446def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool:
447 """Validate an attribute value with a given string and a validator callable.
448 If classlist, create list with attribute value seperated on spaces. Otherwise,
449 the list will only have the attribute value. For each item in the list, check
450 against validator, if valid add to count.
452 Returns:
453 True if the valid count is greater than 0.
454 """
455 list_attributes = ["class"]
457 compare_values = [attr]
458 if name in list_attributes:
459 compare_values = attr.split(" ")
461 return bool(len([item for item in compare_values if validator(item, sub)]) > 0)
464def __parse_el_with_attribute(
465 tag: str | None, context: str | None, attributes: str | None
466) -> dict:
467 el_from_class_from_id = re.compile(r"(#|\.)([\w\-]+)")
469 attr_compare_val = re.compile(
470 r"\[\s*([\w\-:@]+)\s*([\~\|\^\$\*]?=)?\s*(\"[^\"\[\]=]*\"|\'[^\'\[\]=]*\'|[^\s\[\]=\"']+)?\s*\]"
471 )
472 re.compile(r"\[\s*([\w\-:@]+)\]")
474 element = {
475 "tag": tag or "*",
476 "classList": [],
477 "id": None,
478 "attributes": [],
479 }
481 if attributes is not None:
482 for attr in attr_compare_val.findall(attributes):
483 name, compare, value = attr
484 if value is not None:
485 value = value.lstrip("'\"").rstrip("'\"")
486 element["attributes"].append(
487 {
488 "name": name,
489 "compare": compare,
490 "value": value,
491 },
492 )
494 if context is not None:
495 for part in el_from_class_from_id.finditer(context):
496 if part.group(1) == ".":
497 if part.group(2) not in element["classList"]:
498 element["classList"].append(part.group(2))
499 elif part.group(1) == "#":
500 if element["id"] is None:
501 element["id"] = part.group(2)
502 else:
503 raise Exception(
504 f"There may only be one id per element specifier. '{(tag or '') + (context or '')}{attributes or ''}'",
505 )
506 return element
509def __parse_attr_only_element(token: str) -> dict:
510 attr_compare_val = re.compile(
511 r"\[([a-zA-Z0-9_:\-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]"
512 )
514 element = {
515 "tag": None,
516 "classList": [],
517 "id": None,
518 "attributes": [],
519 }
521 element["tag"] = "*"
523 if token not in ["", None]:
524 for attr in attr_compare_val.finditer(token):
525 name, compare, value = attr.groups()
526 if value is not None:
527 value = value.lstrip("'\"").rstrip("'\"")
528 element["attributes"].append(
529 {
530 "name": name,
531 "compare": compare,
532 "value": value,
533 },
534 )
536 return element
539def parse_specifiers(specifier: str) -> list:
540 """
541 Rules:
542 * `*` = any element
543 * `>` = direct child of the current element
544 * `+` = first sibling
545 * `~` = elements after the current element
546 * `.` = class
547 * `#` = id
548 * `[attribute]` = elements with attribute
549 * `[attribute=value]` = elements with attribute=value
550 * `[attribute~=value]` = elements with attribute containing value
551 * `[attribute|=value]` = elements with attribute=value or attribute starting with value-
552 * `[attribute^=value]` = elements with an attribute starting with value
553 * `[attribute$=value]` = elements with an attribute ending with value
554 * `[attribute*=value]` = elements with an attribute containing value
555 """
556 splitter = re.compile(
557 r"([~>\*+])|((?:\[[^\[\]]+\])+)|([^.#\[\]\s]+)?((?:(?:\.|#)[^.#\[\]\s]+)+)?((?:\[[^\[\]]+\])+)?"
558 )
560 tokens = []
561 for token in splitter.finditer(specifier):
562 (
563 sibling,
564 just_attributes,
565 tag,
566 context,
567 attributes,
568 ) = token.groups()
569 if sibling in ["*", ">", "+", "~"]:
570 tokens.append(sibling)
571 elif tag is not None or context is not None or attributes is not None:
572 tokens.append(__parse_el_with_attribute(tag, context, attributes))
573 elif just_attributes is not None:
574 tokens.append(__parse_attr_only_element(just_attributes))
575 return tokens