Coverage for src/midgy/render.py: 97%

147 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-01-02 16:08 -0800

1"""render builds the machinery to translate markdown documents to code.""" 

2 

3from dataclasses import dataclass, field 

4from functools import partial 

5from io import StringIO 

6from re import compile 

7 

8__all__ = () 

9 

10DOCTEST_CHAR, CONTINUATION_CHAR, COLON_CHAR, QUOTES_CHARS = 62, 92, 58, {39, 34} 

11BLOCK, FENCE, PYCON = "code_block", "fence", "pycon" 

12ESCAPE = {x: "\\" + x for x in "'\""} 

13ESCAPE_PATTERN = compile("[" + "".join(ESCAPE) + "]") 

14escape = partial(ESCAPE_PATTERN.sub, lambda m: ESCAPE.get(m.group(0))) 

15SP, QUOTES = chr(32), (chr(34) * 3, chr(39) * 3) 

16 

17 

18# the Renderer is special markdown renderer designed to produce 

19# line for line transformations of markdown to the converted code. 

20# not all languages require this, but for python it matters. 

21@dataclass 

22class Renderer: 

23 """the base render system for markdown to code. 

24 

25 * tokenize & render markdown as code 

26 * line-for-line rendering 

27 * use indented code as fiducial markers for translation 

28 * augment the commonmark spec with shebang, doctest, code, and front_matter tokens 

29 * a reusable base class that underlies the python translation 

30 """ 

31 

32 parser: object = None 

33 cell_hr_length: int = 9 

34 include_code: bool = True # the nuclear option 

35 include_code_fences: set = field(default_factory=set) 

36 include_indented_code: bool = True 

37 include_doctest: bool = False 

38 config_key: str = "py" 

39 

40 def __post_init__(self): 

41 self.parser = self.get_parser() 

42 

43 @classmethod 

44 def code_from_string(cls, body, **kwargs): 

45 """render a string""" 

46 return cls(**kwargs).render(body) 

47 

48 def get_block(self, env, stop=None): 

49 """iterate through the lines in a buffer""" 

50 if stop is None: 

51 yield from env["source"] 

52 else: 

53 while env["last_line"] < stop: 

54 yield self.readline(env) 

55 

56 def get_cells(self, tokens, *, env=None, include_hr=True): 

57 """walk cells separated by mega-hrs""" 

58 block = [] 

59 for token in tokens: 

60 if token.type == "hr": 

61 if (len(token.markup) - token.markup.count(" ")) > self.cell_hr_length: 

62 yield (list(block), token) 

63 block.clear() 

64 if include_hr: 

65 block.append(token) 

66 elif env is not None: 

67 list(self.get_block(env, token)) 

68 else: 

69 block.append(token) 

70 if block: 

71 yield block, None 

72 

73 def get_front_matter(self, tokens): 

74 for token in tokens: 

75 if token.type == "shebang": 

76 continue 

77 if token.type == "front_matter": 

78 from .front_matter import load 

79 

80 if "data" in token.meta: 

81 return token.meta["data"] 

82 return token.meta.setdefault("data", load(token.content)) 

83 return 

84 

85 def get_initial_env(self, src, tokens): 

86 """initialize the parser environment indents""" 

87 env = dict(source=StringIO(src), last_line=0, last_indent=0) 

88 for token in filter(self.is_code_block, tokens): # iterate through the tokens 

89 env["min_indent"] = min(env.get("min_indent", 9999), token.meta["min_indent"]) 

90 env.setdefault("min_indent", 0) 

91 return env 

92 

93 def get_parser(self): 

94 from markdown_it import MarkdownIt 

95 

96 parser = MarkdownIt("gfm-like", options_update=dict(inline_definitions=True, langPrefix="")) 

97 return self.set_parser_defaults(parser) 

98 

99 def get_updated_env(self, token, env, **kwargs): 

100 """update the state of the environment""" 

101 left = token.content.rstrip() 

102 env.update( 

103 continued=left.endswith("\\"), 

104 colon_block=left.endswith(":"), 

105 quoted_block=left.endswith(QUOTES), 

106 ) 

107 env.update(kwargs) 

108 

109 def is_code_block(self, token): 

110 """is the token a code block entry""" 

111 if self.include_code: 

112 if token.type == BLOCK: 

113 if token.meta["is_doctest"]: 

114 return self.include_doctest 

115 return self.include_indented_code 

116 elif token.type == FENCE: 

117 if token.info in self.include_code_fences: 

118 return True 

119 if token.info == PYCON: 

120 return self.include_doctest 

121 return False 

122 

123 def non_code(self, env, next=None): 

124 yield from self.get_block(env, next.map[0] if next else None) 

125 if next: 

126 env.update(last_indent=next.meta.get("last_indent", 0)) 

127 

128 def parse(self, src): 

129 return self.parser.parse(src) 

130 

131 def parse_cells(self, body, *, include_hr=True): 

132 yield from (x[0] for x in self.get_cells(self.parse(body), include_hr=include_hr)) 

133 

134 def print(self, iter, io): 

135 return print(*iter, file=io, sep="", end="") 

136 

137 def readline(self, env): 

138 try: 

139 return env["source"].readline() 

140 finally: 

141 env["last_line"] += 1 

142 

143 def render(self, src): 

144 return self.render_tokens(self.parse(src), src=src) 

145 

146 def render_cells(self, src, *, include_hr=True): 

147 # cells allow different parsers in a single pass 

148 tokens = self.parse(src) 

149 self = self.renderer_from_tokens(tokens) 

150 prior = self.get_initial_env(src, tokens) 

151 prior_token = None 

152 source = prior.pop("source") 

153 

154 for block, next_token in self.get_cells(tokens, env=prior, include_hr=include_hr): 

155 env = self.get_initial_env(src, block) 

156 env["source"], env["last_line"] = source, prior["last_line"] 

157 prior_token and block.insert(0, prior_token) 

158 yield self.render_tokens(block, env=env, stop=next_token) 

159 prior, prior_token = env, next_token 

160 

161 def render_token(self, token, env): 

162 if token: 

163 method = getattr(self, token.type, None) 

164 if method: 

165 yield from method(token, env) or () 

166 

167 def render_tokens(self, tokens, env=None, src=None, stop=None, target=None): 

168 """render parsed markdown tokens""" 

169 if target is None: 

170 target = StringIO() 

171 self = self.renderer_from_tokens(tokens) 

172 if env is None: 

173 env = self.get_initial_env(src, tokens) 

174 for token in tokens: 

175 if self.is_code_block(token): 

176 env["next_code"] = token 

177 self.print(self.render_token(token, env), target) 

178 # handle anything left in the buffer 

179 self.print(self.non_code(env, stop), target) 

180 return target.getvalue() # return the value of the target, a format string. 

181 

182 def renderer_from_tokens(self, tokens): 

183 front_matter = self.get_front_matter(tokens) 

184 if front_matter: 

185 # front matter can reconfigure the parser and make a new one 

186 config = {k: getattr(self, k) for k in self.__dataclass_fields__} 

187 config.update(front_matter.get(self.config_key, {})) 

188 if config: 

189 return type(self)(**config) 

190 return self 

191 

192 def set_parser_defaults(self, parser): 

193 # our tangling system adds extra conventions to commonmark: 

194 ## extend indented code to recognize doctest syntax in-line 

195 ## replace the indented code lexer to recognize doctests and append metadata. 

196 ## recognize shebang lines at the beginning of a document. 

197 ## recognize front-matter at the beginning of document of following shebangs 

198 from mdit_py_plugins import deflist, footnote 

199 from .front_matter import _front_matter_lexer, _shebang_lexer 

200 from .lexers import code_fence_lexer, doctest_lexer, code_lexer 

201 

202 parser.block.ruler.before("code", "doctest", doctest_lexer) 

203 parser.block.ruler.disable("code") 

204 # our indented code captures doctests in indented blocks 

205 parser.block.ruler.after("doctest", "code", code_lexer) 

206 parser.disable(FENCE) 

207 # our code fence captures indent information 

208 parser.block.ruler.after("code", FENCE, code_fence_lexer) 

209 # shebang because this markdown is code 

210 parser.block.ruler.before("table", "shebang", _shebang_lexer) 

211 parser.block.ruler.before("table", "front_matter", _front_matter_lexer) 

212 parser.use(footnote.footnote_plugin).use(deflist.deflist_plugin) 

213 parser.disable("footnote_tail") 

214 return parser