Coverage for phml\utilities\locate\select.py: 100%

214 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-05 15:06 -0500

1"""utilities.select 

2 

3A collection of utilities around querying for specific 

4types of data. 

5""" 

6 

7# PERF: Support for all `:` selectors from https://www.w3schools.com/cssref/css_selectors.php 

8# - Strip all `::` selectors and `:` not supported by phml implementation 

9# - This will allow for parsing of css selectors and and adding scoping to component style elements 

10# Add a data-phml-style-scope attribute to matching elements in the components. Edit the selector to then 

11# have :is([data-phml-style-scope="phml-<hash>"])<selector> 

12 

13import re 

14from typing import Callable 

15 

16from phml.nodes import Element, Node, Parent 

17from phml.utilities.travel.travel import walk 

18 

19__all__ = ["query", "query_all", "matches", "parse_specifiers"] 

20 

21 

22def query(tree: Parent, specifier: str) -> Element | None: 

23 """Same as javascripts querySelector. `#` indicates an id and `.` 

24 indicates a class. If they are used alone they match anything. 

25 Any tag can be used by itself or with `#` and/or `.`. You may use 

26 any number of class specifiers, but may only use one id specifier per 

27 tag name. Complex specifiers are accepted are allowed meaning you can 

28 have space seperated specifiers indicating nesting or a parent child 

29 relationship. 

30 

31 Rules: 

32 * `*` = any element 

33 * `>` = direct child of the current element 

34 * `+` = first sibling 

35 * `~` = elements after the current element 

36 * `.` = class 

37 * `#` = id 

38 * `[attribute]` = elements with attribute 

39 * `[attribute=value]` = elements with attribute=value 

40 * `[attribute~=value]` = elements with attribute containing value 

41 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 

42 * `[attribute^=value]` = elements with an attribute starting with value 

43 * `[attribute$=value]` = elements with an attribute ending with value 

44 * `[attribute*=value]` = elements with an attribute containing value 

45 

46 Examles: 

47 * `.some-example` matches the first element with the class `some-example` 

48 * `#some-example` matches the first element with the id `some-example` 

49 * `li` matches the first `li` element 

50 * `li.red` matches the first `li` with the class `red` 

51 * `li#red` matches the first `li` with the id `red` 

52 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 

53 * `div.form-control input[type="checkbox"]` matches the first `input` with the 

54 attribute `type="checked"` that has a parent `div` with the class `form-control`. 

55 

56 Return: 

57 Element | None: The first element matching the specifier or None if no element was 

58 found. 

59 """ 

60 

61 def all_nodes(current: Parent, rules: list, include_self: bool = True): 

62 """Get all nodes starting with the current node.""" 

63 

64 result = None 

65 for node in walk(current): 

66 if isinstance(node, Element) and (include_self or node != current): 

67 result = branch(node, rules) 

68 if result is not None: 

69 break 

70 return result 

71 

72 def all_children(current: Parent, rules: list): 

73 """Get all children of the curret node.""" 

74 result = None 

75 for node in current: 

76 if isinstance(node, Element): 

77 result = branch(node, rules) 

78 if result is not None: 

79 break 

80 return result 

81 

82 def first_sibling(node: Parent, rules: list): 

83 """Get the first sibling following the node.""" 

84 if node.parent is None: 

85 return None 

86 

87 idx = node.parent.index(node) 

88 if idx + 1 < len(node.parent) and isinstance(node.parent[idx + 1], Element): 

89 return branch(node.parent[idx + 1], rules) 

90 return None 

91 

92 def all_siblings(current: Parent, rules: list): 

93 """Get all siblings after the current node.""" 

94 if current.parent is None: 

95 return None 

96 

97 result = None 

98 idx = current.parent.index(current) 

99 if idx + 1 < len(current.parent): 

100 for node in range(idx + 1, len(current.parent)): 

101 if isinstance(current.parent[node], Element): 

102 result = branch(current.parent[node], rules) 

103 if result is not None: 

104 break 

105 return result 

106 

107 def process_dict(rules: list, node: Element): 

108 if is_equal(rules[0], node): 

109 if len(rules) - 1 == 0: 

110 return node 

111 

112 if isinstance(rules[1], dict) or rules[1] == "*": 

113 return ( 

114 all_nodes(node, rules[1:], False) 

115 if isinstance(rules[1], dict) 

116 else all_nodes(node, rules[2:], False) 

117 ) 

118 

119 return branch(node, rules[1:]) 

120 return None 

121 

122 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 

123 """Based on the current rule, recursively check the nodes. 

124 If on the last rule then return the current valid node. 

125 """ 

126 

127 if isinstance(node, Parent): 

128 if len(rules) == 0: 

129 return node 

130 

131 if isinstance(rules[0], dict) and isinstance(node, Element): 

132 return process_dict(rules, node) 

133 

134 if rules[0] == "*": 

135 return all_nodes(node, rules[1:]) 

136 

137 if rules[0] == ">": 

138 return all_children(node, rules[1:]) 

139 

140 if rules[0] == "+": 

141 return first_sibling(node, rules[1:]) 

142 

143 if rules[0] == "~": 

144 return all_siblings(node, rules[1:]) 

145 

146 rules = parse_specifiers(specifier) 

147 return all_nodes(tree, rules) 

148 

149 

150def query_all(tree: Parent, specifier: str) -> list[Element]: 

151 """Same as javascripts querySelectorAll. `#` indicates an id and `.` 

152 indicates a class. If they are used alone they match anything. 

153 Any tag can be used by itself or with `#` and/or `.`. You may use 

154 any number of class specifiers, but may only use one id specifier per 

155 tag name. Complex specifiers are accepted are allowed meaning you can 

156 have space seperated specifiers indicating nesting or a parent child 

157 relationship. 

158 

159 Rules: 

160 * `*` = any element 

161 * `>` = direct child of the current element 

162 * `+` = first sibling 

163 * `~` = elements after the current element 

164 * `.` = class 

165 * `#` = id 

166 * `[attribute]` = elements with attribute 

167 * `[attribute=value]` = elements with attribute=value 

168 * `[attribute~=value]` = elements with attribute containing value 

169 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 

170 * `[attribute^=value]` = elements with an attribute starting with value 

171 * `[attribute$=value]` = elements with an attribute ending with value 

172 * `[attribute*=value]` = elements with an attribute containing value 

173 

174 Examles: 

175 * `.some-example` matches the first element with the class `some-example` 

176 * `#some-example` matches the first element with the id `some-example` 

177 * `li` matches the first `li` element 

178 * `li.red` matches the first `li` with the class `red` 

179 * `li#red` matches the first `li` with the id `red` 

180 * `input[type="checkbox"]` matches the first `input` with the attribute `type="checkbox"` 

181 * `div.form-control input[type="checkbox"]` matches the first `input` with the 

182 attribute `type="checked"` that has a parent `div` with the class `form-control`. 

183 

184 Return: 

185 list[Element] | None: The all elements matching the specifier or and empty list if no 

186 elements were found. 

187 """ 

188 

189 def all_nodes(current: Parent, rules: list, include_self: bool = True): 

190 """Get all nodes starting with the current node.""" 

191 results = [] 

192 for node in walk(current): 

193 if isinstance(node, Element) and (include_self or node != current): 

194 results.extend(branch(node, rules) or []) 

195 return results 

196 

197 def all_children(current: Parent, rules: list): 

198 """Get all children of the curret node.""" 

199 results = [] 

200 for node in current: 

201 if isinstance(node, Element): 

202 results.extend(branch(node, rules) or []) 

203 return results 

204 

205 def first_sibling(node: Parent, rules: list): 

206 """Get the first sibling following the node.""" 

207 if node.parent is None: 

208 return [] 

209 

210 idx = node.parent.index(node) 

211 if idx + 1 < len(node.parent) and node.parent[idx + 1].type == "element": 

212 result = branch(node.parent[idx + 1], rules) 

213 return result 

214 return [] 

215 

216 def all_siblings(current: Parent, rules: list): 

217 """Get all siblings after the current node.""" 

218 if current.parent is None: 

219 return [] 

220 

221 results = [] 

222 idx = current.parent.index(current) 

223 if idx + 1 < len(current.parent): 

224 for node in range(idx + 1, len(current.parent)): 

225 if current.parent[node].type == "element": 

226 results.extend(branch(current.parent[node], rules) or []) 

227 return results 

228 

229 def process_dict(rules: list, node: Element): 

230 if is_equal(rules[0], node): 

231 if len(rules) - 1 == 0: 

232 return [node] 

233 

234 if isinstance(rules[1], dict) or rules[1] == "*": 

235 return ( 

236 all_nodes(node, rules[1:]) 

237 if isinstance(rules[1], dict) 

238 else all_nodes(node, rules[2:], False) 

239 ) 

240 

241 return branch(node, rules[1:]) 

242 return [] 

243 

244 def branch(node: Node, rules: list): # pylint: disable=too-many-return-statements 

245 """Based on the current rule, recursively check the nodes. 

246 If on the last rule then return the current valid node. 

247 """ 

248 

249 if isinstance(node, Parent): 

250 if len(rules) == 0: 

251 return [node] 

252 

253 if isinstance(rules[0], dict) and isinstance(node, Element): 

254 return process_dict(rules, node) 

255 

256 if rules[0] == "*": 

257 return all_nodes(node, rules[1:]) 

258 

259 if rules[0] == ">": 

260 return all_children(node, rules[1:]) 

261 

262 if rules[0] == "+": 

263 return first_sibling(node, rules[1:]) 

264 

265 if rules[0] == "~": 

266 return all_siblings(node, rules[1:]) 

267 

268 rules = parse_specifiers(specifier) 

269 return all_nodes(tree, rules) 

270 # return [result[i] for i in range(len(result)) if i == result.index(result[i])] 

271 

272 

273def matches(node: Element, specifier: str) -> bool: 

274 """Works the same as the Javascript matches. `#` indicates an id and `.` 

275 indicates a class. If they are used alone they match anything. 

276 Any tag can be used by itself or with `#` and/or `.`. You may use 

277 any number of class specifiers, but may only use one id specifier per 

278 tag name. Complex specifiers are not supported. Everything in the specifier 

279 must relate to one element/tag. 

280 

281 Rules: 

282 * `.` = class 

283 * `#` = id 

284 * `[attribute]` = elements with attribute 

285 * `[attribute=value]` = elements with attribute=value 

286 * `[attribute~=value]` = elements with attribute containing value 

287 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 

288 * `[attribute^=value]` = elements with an attribute starting with value 

289 * `[attribute$=value]` = elements with an attribute ending with value 

290 * `[attribute*=value]` = elements with an attribute containing value 

291 

292 Examles: 

293 * `.some-example` matches the element with the class `some-example` 

294 * `#some-example` matches the element with the id `some-example` 

295 * `li` matches an `li` element 

296 * `li.red` matches the an `li` with the class `red` 

297 * `li#red` matches the an `li` with the id `red` 

298 * `input[type="checkbox"]` matches the `input` element with the attribute `type="checkbox"` 

299 """ 

300 

301 rules = parse_specifiers(specifier) 

302 

303 if len(rules) > 1: 

304 raise Exception(f"Complex specifier detected and is not allowed.\n{specifier}") 

305 if not isinstance(rules[0], dict): 

306 raise Exception( 

307 "Specifier must only include tag name, classes, id, and or attribute specfiers.\n\ 

308Example: `li.red#sample[class^='form-'][title~='sample']`", 

309 ) 

310 

311 return is_equal(rules[0], node) 

312 

313 

314def is_equal(rule: dict, node: Node) -> bool: 

315 """Checks if a rule is valid on a node. 

316 A rule is a dictionary of possible values and each value must 

317 be valid on the node. 

318 

319 A rule may have a tag, id, classList, and attribute list: 

320 * If the `tag` is provided, the nodes `tag` must match the rules `tag` 

321 * If the `id` is provided, the nodes `id` must match the rules `id` 

322 * If the `classList` is not empty, each class in the `classList` must exist in the nodes 

323 class attribute 

324 * If the `attribute` list is not empty, each attribute in the attribute list with be compared 

325 against the nodes attributes given the `attribute` lists comparators. Below is the list of 

326 possible comparisons. 

327 1. Exists: `[checked]` yields any element that has the attribute `checked` no matter it's 

328 value. 

329 2. Equals: `[checked='no']` yields any element with `checked='no'` 

330 3. Contains: `[class~=sample]` or `[class*=sample]` yields any element with a class 

331 containing `sample` 

332 4. Equal to or startswith value-: `[class|=sample]` yields elements that either have 

333 a class that equals `sample` or or a class that starts with `sample-` 

334 5. Starts with: `[class^=sample]` yields elements with a class that starts with `sample` 

335 6. Ends with: `[class$="sample"]` yields elements with a class that ends wtih `sample` 

336 

337 Args: 

338 rule (dict): The rule to apply to the node. 

339 node (Element): The node the validate. 

340 

341 Returns: 

342 bool: Whether the node passes all the rules in the dictionary. 

343 """ 

344 # Validate tag 

345 if rule["tag"] != "*" and rule["tag"] != node.tag: 

346 return False 

347 

348 # Validate id 

349 if rule["id"] is not None and ("id" not in node or rule["id"] != node["id"]): 

350 return False 

351 

352 # Validate class list 

353 if len(rule["classList"]) > 0: 

354 for klass in rule["classList"]: 

355 if "class" not in node or klass not in str(node["class"]).split(" "): 

356 return False 

357 

358 # Validate all attributes 

359 if len(rule["attributes"]) > 0: 

360 return all( 

361 attr["name"] in node.attributes and __validate_attr(attr, node) 

362 for attr in rule["attributes"] 

363 ) 

364 

365 return True 

366 

367 

368def compare_equal(attr: str, c_value: str) -> bool: 

369 return attr == c_value 

370 

371 

372def compare_equal_or_start_with_value_dash(attr: str, c_value: str) -> bool: 

373 return attr == c_value or attr.startswith(f"{c_value}-") 

374 

375 

376def compare_startswith(attr: str, c_value: str) -> bool: 

377 return attr.startswith(c_value) 

378 

379 

380def compare_endswith(attr: str, c_value: str) -> bool: 

381 return attr.endswith(c_value) 

382 

383 

384def compare_contains(attr: str, c_value: str) -> bool: 

385 return c_value in attr 

386 

387 

388def compare_exists(attr: str, _) -> bool: 

389 return attr == "true" 

390 

391 

392def __validate_attr(attr: dict, node: Element): 

393 attribute = node[attr["name"]] 

394 if isinstance(attribute, bool): 

395 attribute = str(node[attr["name"]]).lower() 

396 

397 if attr["compare"] == "=": 

398 return is_valid_attr( 

399 attr=attribute, 

400 sub=attr["value"], 

401 name=attr["name"], 

402 validator=compare_equal, 

403 ) 

404 

405 if attr["compare"] == "|=": 

406 return is_valid_attr( 

407 attr=attribute, 

408 sub=attr["value"], 

409 name=attr["name"], 

410 validator=compare_equal_or_start_with_value_dash, 

411 ) 

412 

413 if attr["compare"] == "^=": 

414 return is_valid_attr( 

415 attr=attribute, 

416 sub=attr["value"], 

417 name=attr["name"], 

418 validator=compare_startswith, 

419 ) 

420 

421 if attr["compare"] == "$=": 

422 return is_valid_attr( 

423 attr=attribute, 

424 sub=attr["value"], 

425 name=attr["name"], 

426 validator=compare_endswith, 

427 ) 

428 

429 if attr["compare"] in ["*=", "~="]: 

430 return is_valid_attr( 

431 attr=attribute, 

432 sub=attr["value"], 

433 name=attr["name"], 

434 validator=compare_contains, 

435 ) 

436 

437 if attr["compare"] == "" and attr["value"] == "": 

438 return is_valid_attr( 

439 attr=attribute, 

440 sub=attr["value"], 

441 name=attr["name"], 

442 validator=compare_exists, 

443 ) 

444 

445 

446def is_valid_attr(attr: str, sub: str, name: str, validator: Callable) -> bool: 

447 """Validate an attribute value with a given string and a validator callable. 

448 If classlist, create list with attribute value seperated on spaces. Otherwise, 

449 the list will only have the attribute value. For each item in the list, check 

450 against validator, if valid add to count. 

451 

452 Returns: 

453 True if the valid count is greater than 0. 

454 """ 

455 list_attributes = ["class"] 

456 

457 compare_values = [attr] 

458 if name in list_attributes: 

459 compare_values = attr.split(" ") 

460 

461 return bool(len([item for item in compare_values if validator(item, sub)]) > 0) 

462 

463 

464def __parse_el_with_attribute( 

465 tag: str | None, context: str | None, attributes: str | None 

466) -> dict: 

467 el_from_class_from_id = re.compile(r"(#|\.)([\w\-]+)") 

468 

469 attr_compare_val = re.compile( 

470 r"\[\s*([\w\-:@]+)\s*([\~\|\^\$\*]?=)?\s*(\"[^\"\[\]=]*\"|\'[^\'\[\]=]*\'|[^\s\[\]=\"']+)?\s*\]" 

471 ) 

472 re.compile(r"\[\s*([\w\-:@]+)\]") 

473 

474 element = { 

475 "tag": tag or "*", 

476 "classList": [], 

477 "id": None, 

478 "attributes": [], 

479 } 

480 

481 if attributes is not None: 

482 for attr in attr_compare_val.findall(attributes): 

483 name, compare, value = attr 

484 if value is not None: 

485 value = value.lstrip("'\"").rstrip("'\"") 

486 element["attributes"].append( 

487 { 

488 "name": name, 

489 "compare": compare, 

490 "value": value, 

491 }, 

492 ) 

493 

494 if context is not None: 

495 for part in el_from_class_from_id.finditer(context): 

496 if part.group(1) == ".": 

497 if part.group(2) not in element["classList"]: 

498 element["classList"].append(part.group(2)) 

499 elif part.group(1) == "#": 

500 if element["id"] is None: 

501 element["id"] = part.group(2) 

502 else: 

503 raise Exception( 

504 f"There may only be one id per element specifier. '{(tag or '') + (context or '')}{attributes or ''}'", 

505 ) 

506 return element 

507 

508 

509def __parse_attr_only_element(token: str) -> dict: 

510 attr_compare_val = re.compile( 

511 r"\[([a-zA-Z0-9_:\-]+)([~|^$*]?=)?(\"[^\"]+\"|'[^']+'|[^'\"]+)?\]" 

512 ) 

513 

514 element = { 

515 "tag": None, 

516 "classList": [], 

517 "id": None, 

518 "attributes": [], 

519 } 

520 

521 element["tag"] = "*" 

522 

523 if token not in ["", None]: 

524 for attr in attr_compare_val.finditer(token): 

525 name, compare, value = attr.groups() 

526 if value is not None: 

527 value = value.lstrip("'\"").rstrip("'\"") 

528 element["attributes"].append( 

529 { 

530 "name": name, 

531 "compare": compare, 

532 "value": value, 

533 }, 

534 ) 

535 

536 return element 

537 

538 

539def parse_specifiers(specifier: str) -> list: 

540 """ 

541 Rules: 

542 * `*` = any element 

543 * `>` = direct child of the current element 

544 * `+` = first sibling 

545 * `~` = elements after the current element 

546 * `.` = class 

547 * `#` = id 

548 * `[attribute]` = elements with attribute 

549 * `[attribute=value]` = elements with attribute=value 

550 * `[attribute~=value]` = elements with attribute containing value 

551 * `[attribute|=value]` = elements with attribute=value or attribute starting with value- 

552 * `[attribute^=value]` = elements with an attribute starting with value 

553 * `[attribute$=value]` = elements with an attribute ending with value 

554 * `[attribute*=value]` = elements with an attribute containing value 

555 """ 

556 splitter = re.compile( 

557 r"([~>\*+])|((?:\[[^\[\]]+\])+)|([^.#\[\]\s]+)?((?:(?:\.|#)[^.#\[\]\s]+)+)?((?:\[[^\[\]]+\])+)?" 

558 ) 

559 

560 tokens = [] 

561 for token in splitter.finditer(specifier): 

562 ( 

563 sibling, 

564 just_attributes, 

565 tag, 

566 context, 

567 attributes, 

568 ) = token.groups() 

569 if sibling in ["*", ">", "+", "~"]: 

570 tokens.append(sibling) 

571 elif tag is not None or context is not None or attributes is not None: 

572 tokens.append(__parse_el_with_attribute(tag, context, attributes)) 

573 elif just_attributes is not None: 

574 tokens.append(__parse_attr_only_element(just_attributes)) 

575 return tokens