phml.utilities.transform.extract
1from phml.core.nodes import AST, NODE, Comment, Element, Root, Text 2 3__all__ = ["to_string"] 4 5 6def to_string(node: AST | NODE) -> str: 7 """Get the raw text content of the element. Works similar to 8 the DOMs Node#textContent getter. 9 10 Args: 11 node (Root | Element | Text): Node to get the text content from 12 13 Returns: 14 str: Raw inner text without formatting. 15 """ 16 17 if isinstance(node, AST): 18 node = node.tree 19 20 if isinstance(node, Text | Comment): 21 return node.value 22 23 def concat_text(element: Element | Root) -> list[str]: 24 result = [] 25 26 for child in element.children: 27 if isinstance(child, (Element, Root)): 28 result.extend(concat_text(child)) 29 elif isinstance(child, Text): 30 result.append(child.value) 31 return result 32 33 if isinstance(node, Root | Element): 34 # Recursive concat 35 return " ".join(concat_text(node)) 36 37 return None
def
to_string( node: phml.core.nodes.AST.AST | phml.core.nodes.nodes.Root | phml.core.nodes.nodes.Element | phml.core.nodes.nodes.Text | phml.core.nodes.nodes.Comment | phml.core.nodes.nodes.DocType | phml.core.nodes.nodes.Parent | phml.core.nodes.nodes.Node | phml.core.nodes.nodes.Literal) -> str:
7def to_string(node: AST | NODE) -> str: 8 """Get the raw text content of the element. Works similar to 9 the DOMs Node#textContent getter. 10 11 Args: 12 node (Root | Element | Text): Node to get the text content from 13 14 Returns: 15 str: Raw inner text without formatting. 16 """ 17 18 if isinstance(node, AST): 19 node = node.tree 20 21 if isinstance(node, Text | Comment): 22 return node.value 23 24 def concat_text(element: Element | Root) -> list[str]: 25 result = [] 26 27 for child in element.children: 28 if isinstance(child, (Element, Root)): 29 result.extend(concat_text(child)) 30 elif isinstance(child, Text): 31 result.append(child.value) 32 return result 33 34 if isinstance(node, Root | Element): 35 # Recursive concat 36 return " ".join(concat_text(node)) 37 38 return None
Get the raw text content of the element. Works similar to the DOMs Node#textContent getter.
Args
- node (Root | Element | Text): Node to get the text content from
Returns
str: Raw inner text without formatting.