Coverage for phml\utilities\transform\sanitize\schema.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-04-06 14:50 -0500

1"""Defines the schema on how to sanitize the phml ast.""" 

2from __future__ import annotations 

3from dataclasses import dataclass, field 

4 

5 

6def _extend_dict_dict_( 

7 origin: dict[str, dict], new: dict[str, dict] 

8) -> dict[str, dict]: 

9 for key, value in new.items(): 

10 if key not in origin: 

11 origin[key] = value 

12 else: 

13 origin[key].update(value) 

14 

15 return origin 

16 

17 

18def _extend_dict_list_( 

19 origin: dict[str, list], new: dict[str, list] 

20) -> dict[str, list]: 

21 for key, value in new.items(): 

22 if key not in origin: 

23 origin[key] = value 

24 else: 

25 origin[key].extend([item for item in value if item not in origin[key]]) 

26 

27 return origin 

28 

29 

30@dataclass 

31class Schema: 

32 """Dataclass of information on how to sanatize a phml tree. 

33 

34 `strip (list[str])`: The elements to strip from the tree. 

35 `protocols (dict[str, list])`: Collection of element name and allowed protocal value list 

36 `tag_names (list[str])`: List of allowed tag names. 

37 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property 

38 names. 

39 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required 

40 properties and required property values. 

41 """ 

42 

43 strip: list[str] = field(default_factory=lambda: ["script"]) 

44 ancestors: dict[str, list] = field( 

45 default_factory=lambda: { 

46 "tbody": ["table"], 

47 "tfoot": ["table"], 

48 "thead": ["table"], 

49 "td": ["table"], 

50 "th": ["table"], 

51 "tr": ["table"], 

52 }, 

53 ) 

54 protocols: dict[str, list] = field( 

55 default_factory=lambda: { 

56 "href": ["http", "https", "mailto", "xmpp", "irc", "ircs"], 

57 "cite": ["http", "https"], 

58 "src": ["http", "https"], 

59 "longDesc": ["http", "https"], 

60 }, 

61 ) 

62 tag_names: list[str] = field( 

63 default_factory=lambda: [ 

64 "h1", 

65 "h2", 

66 "h3", 

67 "h4", 

68 "h5", 

69 "h6", 

70 "br", 

71 "b", 

72 "i", 

73 "strong", 

74 "em", 

75 "a", 

76 "pre", 

77 "code", 

78 "img", 

79 "tt", 

80 "div", 

81 "ins", 

82 "del", 

83 "sup", 

84 "sub", 

85 "p", 

86 "ol", 

87 "ul", 

88 "table", 

89 "thead", 

90 "tbody", 

91 "tfoot", 

92 "blockquote", 

93 "dl", 

94 "dt", 

95 "dd", 

96 "kbd", 

97 "q", 

98 "samp", 

99 "var", 

100 "hr", 

101 "ruby", 

102 "rt", 

103 "rp", 

104 "li", 

105 "tr", 

106 "td", 

107 "th", 

108 "s", 

109 "strike", 

110 "summary", 

111 "details", 

112 "caption", 

113 "figure", 

114 "figcaption", 

115 "abbr", 

116 "bdo", 

117 "cite", 

118 "dfn", 

119 "mark", 

120 "small", 

121 "span", 

122 "time", 

123 "wbr", 

124 "input", 

125 ], 

126 ) 

127 attributes: dict[str, list[str | tuple[str|bool, ...]]] = field( 

128 default_factory=lambda: { 

129 "a": ["href"], 

130 "img": ["src", "longDesc"], 

131 "input": [("type", "checkbox"), ("disabled", True)], 

132 "li": [("class", "task-list-item")], 

133 "div": ["itemScope", "itemType"], 

134 "blockquote": ["cite"], 

135 "del": ["cite"], 

136 "ins": ["cite"], 

137 "q": ["cite"], 

138 "*": [ 

139 "abbr", 

140 "accept", 

141 "acceptCharset", 

142 "accessKey", 

143 "action", 

144 "align", 

145 "alt", 

146 "ariaDescribedBy", 

147 "ariaHidden", 

148 "ariaLabel", 

149 "ariaLabelledBy", 

150 "axis", 

151 "border", 

152 "cellPadding", 

153 "cellSpacing", 

154 "char", 

155 "charOff", 

156 "charSet", 

157 "checked", 

158 "clear", 

159 "cols", 

160 "colSpan", 

161 "color", 

162 "compact", 

163 "coords", 

164 "dateTime", 

165 "dir", 

166 "disabled", 

167 "encType", 

168 "htmlFor", 

169 "frame", 

170 "headers", 

171 "height", 

172 "hrefLang", 

173 "hSpace", 

174 "isMap", 

175 "id", 

176 "label", 

177 "lang", 

178 "maxLength", 

179 "media", 

180 "method", 

181 "multiple", 

182 "name", 

183 "noHref", 

184 "noShade", 

185 "noWrap", 

186 "open", 

187 "prompt", 

188 "readOnly", 

189 "rel", 

190 "rev", 

191 "rows", 

192 "rowSpan", 

193 "rules", 

194 "scope", 

195 "selected", 

196 "shape", 

197 "size", 

198 "span", 

199 "start", 

200 "summary", 

201 "tabIndex", 

202 "target", 

203 "title", 

204 "type", 

205 "useMap", 

206 "vAlign", 

207 "value", 

208 "vSpace", 

209 "width", 

210 "itemProp", 

211 ], 

212 }, 

213 ) 

214 required: dict[str, dict[str, str | bool]] = field( 

215 default_factory=lambda: { 

216 "input": { 

217 "type": "checkbox", 

218 "disabled": True, 

219 }, 

220 }, 

221 ) 

222 

223 def extend( 

224 self, 

225 strip: list[str] | None = None, 

226 ancestors: dict[str, list[str]] | None = None, 

227 protocols: dict[str, list[str]] | None = None, 

228 tag_names: list[str] | None = None, 

229 attributes: dict[str, list[str | tuple[str|bool, ...]]] | None = None, 

230 required: dict[str, dict[str, str | bool]] | None = None, 

231 ) -> Schema: 

232 """Extend the default schemas values. 

233 

234 Args: 

235 `strip (list[str])`: The elements to strip from the tree. 

236 `ancestors (dict[str, list[str]])`: Key is a element tag and the value is a list of valid 

237 parent elements. 

238 `protocols (dict[str, list[str]])`: Collection of element names to list of valid protocols (prefixes). 

239 `tag_names (list[str])`: List of allowed tag names. 

240 `attributes (dict[str, list[str | list[str]]])`: Collection of element name and allowed property 

241 names. 

242 `required (dict[str, dict[str, str | bool]])`: Collection of element names and their required 

243 properties and required property values. 

244 """ 

245 

246 return Schema( 

247 strip=list(set([*self.strip, *(strip or [])])), 

248 ancestors=_extend_dict_list_({**self.ancestors}, ancestors or {}), 

249 protocols=_extend_dict_list_({**self.protocols}, protocols or {}), 

250 attributes=_extend_dict_list_({**self.attributes}, attributes or {}), 

251 tag_names=list(set([*self.tag_names, *(tag_names or [])])), 

252 required=_extend_dict_dict_({**self.required}, required or {}), 

253 )