Coverage for src/midgy/lexers.py: 95%

74 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2022-12-30 11:10 -0800

1from re import compile 

2 

3BLOCK, FENCE, PYCON = "code_block", "fence", "pycon" 

4DOCTEST_CHARS = 62, 62, 62, 32 # >>>S 

5ELLIPSIS_CHARS = (ord("."),) * 3 + (32,) 

6MAGIC_CHARS = (37, 37) # %% 

7MAGIC = compile("^\s*%{2}\S+") 

8 

9 

10def code_lexer(state, start, end, silent=False): 

11 """a code lexer that tracks indents in the token and is aware of doctests""" 

12 is_magic, min_indent = None, 9999 

13 if state.sCount[start] - state.blkIndent >= 4: 

14 first_indent, last_indent, next, last_line = 0, 0, start, start 

15 while next < end: 

16 if state.isEmpty(next): 

17 next += 1 

18 continue 

19 if state.sCount[next] - state.blkIndent >= 4: 

20 begin = state.bMarks[next] + state.tShift[next] 

21 if is_magic is None: 

22 is_magic = state.srcCharCode[begin : begin + 2] == MAGIC_CHARS 

23 if not is_magic and state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

24 break 

25 if not first_indent: 

26 first_indent = state.sCount[next] 

27 min_indent = min(min_indent, state.sCount[next]) 

28 last_indent, last_line = state.sCount[next], next 

29 next += 1 

30 else: 

31 break 

32 state.line = last_line + 1 

33 token = state.push(BLOCK, "code", 0) 

34 token.content = state.getLines(start, state.line, 4 + state.blkIndent, True) 

35 token.map = [start, state.line] 

36 token.meta.update( 

37 first_indent=first_indent, 

38 last_indent=last_indent, 

39 min_indent=min_indent, 

40 is_magic=is_magic, 

41 is_doctest=False, 

42 ) 

43 return True 

44 return False 

45 

46 

47def doctest_lexer(state, startLine, end, silent=False): 

48 """a markdown-it-py plugin for doctests 

49 

50 doctest are a literate programming convention in python that we 

51 include in the pidgy grammar. this avoids a mixing python and doctest 

52 code together. 

53 

54 the doctest blocks: 

55 * extend the indented code blocks 

56 * do not conflict with blockquotes 

57 * are implicit code fences with the `pycon` info 

58 * can be replaced with explicit code blocks. 

59 """ 

60 start = state.bMarks[startLine] + state.tShift[startLine] 

61 

62 if (state.sCount[startLine] - state.blkIndent) < 4: 

63 return False 

64 

65 if state.srcCharCode[start : start + 4] == DOCTEST_CHARS: 

66 lead, extra, output, closed = startLine, startLine + 1, startLine + 1, False 

67 indent, next, magic = state.sCount[startLine], startLine + 1, None 

68 while next < end: 

69 if state.isEmpty(next): 

70 break 

71 if state.sCount[next] < indent: 

72 break 

73 begin = state.bMarks[next] + state.tShift[next] 

74 if state.srcCharCode[begin : begin + 4] == DOCTEST_CHARS: 

75 break 

76 next += 1 

77 if (not closed) and state.srcCharCode[begin : begin + 4] == ELLIPSIS_CHARS: 

78 extra = next 

79 else: 

80 closed = True 

81 output = next 

82 state.line = next 

83 token = state.push(BLOCK, "code", 0) 

84 token.content = state.getLines(startLine, next, 0, True) 

85 token.map = [startLine, state.line] 

86 token.meta.update( 

87 first_indent=indent, 

88 last_indent=indent, 

89 min_indent=indent, 

90 is_magic=bool(MAGIC.match(token.content.lstrip().lstrip(">").lstrip())), 

91 is_doctest=True, 

92 input=[lead, extra], 

93 output=[extra, output] if extra < output else None, 

94 ) 

95 return True 

96 return False 

97 

98 

99def code_fence_lexer(state, *args, **kwargs): 

100 from markdown_it.rules_block.fence import fence 

101 

102 result = fence(state, *args, **kwargs) 

103 if result: 

104 token = state.tokens[-1] 

105 first_indent, last_indent = None, 0 

106 extent = range(token.map[0] + 1, token.map[1] - 1) 

107 for next in extent: 

108 if first_indent is None: 

109 first_indent = state.sCount[next] 

110 last_indent = state.sCount[next] 

111 min_indent = min([state.sCount[i] for i in extent if not state.isEmpty(i)] or [0]) 

112 

113 token.meta.update( 

114 first_indent=first_indent or 0, 

115 last_indent=last_indent, 

116 min_indent=min_indent, 

117 is_magic_info=bool(MAGIC.match(token.info)), 

118 is_magic=bool(MAGIC.match(token.content)), 

119 is_doctest=token.info == PYCON, 

120 ) 

121 return result