initial commit
This commit is contained in:
@@ -0,0 +1,476 @@
|
||||
# Python Markdown
|
||||
|
||||
# A Python implementation of John Gruber's Markdown.
|
||||
|
||||
# Documentation: https://python-markdown.github.io/
|
||||
# GitHub: https://github.com/Python-Markdown/markdown/
|
||||
# PyPI: https://pypi.org/project/Markdown/
|
||||
|
||||
# Started by Manfred Stienstra (http://www.dwerg.net/).
|
||||
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
|
||||
# Currently maintained by Waylan Limberg (https://github.com/waylan),
|
||||
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
|
||||
|
||||
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
|
||||
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
|
||||
# Copyright 2004 Manfred Stienstra (the original version)
|
||||
|
||||
# License: BSD (see LICENSE.md for details).
|
||||
|
||||
"""
|
||||
Tree processors manipulate the tree created by block processors. They can even create an entirely
|
||||
new `ElementTree` object. This is an excellent place for creating summaries, adding collected
|
||||
references, or last minute adjustments.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import xml.etree.ElementTree as etree
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from . import util
|
||||
from . import inlinepatterns
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from markdown import Markdown
|
||||
|
||||
|
||||
def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
|
||||
""" Build the default `treeprocessors` for Markdown. """
|
||||
treeprocessors = util.Registry()
|
||||
treeprocessors.register(InlineProcessor(md), 'inline', 20)
|
||||
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
|
||||
treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
|
||||
return treeprocessors
|
||||
|
||||
|
||||
def isString(s: object) -> bool:
|
||||
""" Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
|
||||
if not isinstance(s, util.AtomicString):
|
||||
return isinstance(s, str)
|
||||
return False
|
||||
|
||||
|
||||
class Treeprocessor(util.Processor):
|
||||
"""
|
||||
`Treeprocessor`s are run on the `ElementTree` object before serialization.
|
||||
|
||||
Each `Treeprocessor` implements a `run` method that takes a pointer to an
|
||||
`Element` and modifies it as necessary.
|
||||
|
||||
`Treeprocessors` must extend `markdown.Treeprocessor`.
|
||||
|
||||
"""
|
||||
def run(self, root: etree.Element) -> etree.Element | None:
|
||||
"""
|
||||
Subclasses of `Treeprocessor` should implement a `run` method, which
|
||||
takes a root `Element`. This method can return another `Element`
|
||||
object, and the existing root `Element` will be replaced, or it can
|
||||
modify the current tree and return `None`.
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
||||
|
||||
class InlineProcessor(Treeprocessor):
|
||||
"""
|
||||
A `Treeprocessor` that traverses a tree, applying inline patterns.
|
||||
"""
|
||||
|
||||
def __init__(self, md: Markdown):
|
||||
self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
|
||||
self.__placeholder_suffix = util.ETX
|
||||
self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
|
||||
+ len(self.__placeholder_suffix)
|
||||
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
|
||||
self.md = md
|
||||
self.inlinePatterns = md.inlinePatterns
|
||||
self.ancestors: list[str] = []
|
||||
|
||||
def __makePlaceholder(self, type: str) -> tuple[str, str]:
|
||||
""" Generate a placeholder """
|
||||
id = "%04d" % len(self.stashed_nodes)
|
||||
hash = util.INLINE_PLACEHOLDER % id
|
||||
return hash, id
|
||||
|
||||
def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]:
|
||||
"""
|
||||
Extract id from data string, start from index.
|
||||
|
||||
Arguments:
|
||||
data: String.
|
||||
index: Index, from which we start search.
|
||||
|
||||
Returns:
|
||||
Placeholder id and string index, after the found placeholder.
|
||||
|
||||
"""
|
||||
m = self.__placeholder_re.search(data, index)
|
||||
if m:
|
||||
return m.group(1), m.end()
|
||||
else:
|
||||
return None, index + 1
|
||||
|
||||
def __stashNode(self, node: etree.Element | str, type: str) -> str:
|
||||
""" Add node to stash. """
|
||||
placeholder, id = self.__makePlaceholder(type)
|
||||
self.stashed_nodes[id] = node
|
||||
return placeholder
|
||||
|
||||
def __handleInline(self, data: str, patternIndex: int = 0) -> str:
|
||||
"""
|
||||
Process string with inline patterns and replace it with placeholders.
|
||||
|
||||
Arguments:
|
||||
data: A line of Markdown text.
|
||||
patternIndex: The index of the `inlinePattern` to start with.
|
||||
|
||||
Returns:
|
||||
String with placeholders.
|
||||
|
||||
"""
|
||||
if not isinstance(data, util.AtomicString):
|
||||
startIndex = 0
|
||||
count = len(self.inlinePatterns)
|
||||
while patternIndex < count:
|
||||
data, matched, startIndex = self.__applyPattern(
|
||||
self.inlinePatterns[patternIndex], data, patternIndex, startIndex
|
||||
)
|
||||
if not matched:
|
||||
patternIndex += 1
|
||||
return data
|
||||
|
||||
def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
|
||||
"""
|
||||
Process placeholders in `Element.text` or `Element.tail`
|
||||
of Elements popped from `self.stashed_nodes`.
|
||||
|
||||
Arguments:
|
||||
node: Parent node.
|
||||
subnode: Processing node.
|
||||
isText: Boolean variable, True - it's text, False - it's a tail.
|
||||
|
||||
"""
|
||||
if isText:
|
||||
text = subnode.text
|
||||
subnode.text = None
|
||||
else:
|
||||
text = subnode.tail
|
||||
subnode.tail = None
|
||||
|
||||
childResult = self.__processPlaceholders(text, subnode, isText)
|
||||
|
||||
if not isText and node is not subnode:
|
||||
pos = list(node).index(subnode) + 1
|
||||
else:
|
||||
pos = 0
|
||||
|
||||
childResult.reverse()
|
||||
for newChild in childResult:
|
||||
node.insert(pos, newChild[0])
|
||||
|
||||
def __processPlaceholders(
|
||||
self,
|
||||
data: str | None,
|
||||
parent: etree.Element,
|
||||
isText: bool = True
|
||||
) -> list[tuple[etree.Element, list[str]]]:
|
||||
"""
|
||||
Process string with placeholders and generate `ElementTree` tree.
|
||||
|
||||
Arguments:
|
||||
data: String with placeholders instead of `ElementTree` elements.
|
||||
parent: Element, which contains processing inline data.
|
||||
isText: Boolean variable, True - it's text, False - it's a tail.
|
||||
|
||||
Returns:
|
||||
List with `ElementTree` elements with applied inline patterns.
|
||||
|
||||
"""
|
||||
def linkText(text: str | None) -> None:
|
||||
if text:
|
||||
if result:
|
||||
if result[-1][0].tail:
|
||||
result[-1][0].tail += text
|
||||
else:
|
||||
result[-1][0].tail = text
|
||||
elif not isText:
|
||||
if parent.tail:
|
||||
parent.tail += text
|
||||
else:
|
||||
parent.tail = text
|
||||
else:
|
||||
if parent.text:
|
||||
parent.text += text
|
||||
else:
|
||||
parent.text = text
|
||||
result = []
|
||||
strartIndex = 0
|
||||
while data:
|
||||
index = data.find(self.__placeholder_prefix, strartIndex)
|
||||
if index != -1:
|
||||
id, phEndIndex = self.__findPlaceholder(data, index)
|
||||
|
||||
if id in self.stashed_nodes:
|
||||
node = self.stashed_nodes.get(id)
|
||||
|
||||
if index > 0:
|
||||
text = data[strartIndex:index]
|
||||
linkText(text)
|
||||
|
||||
if not isinstance(node, str): # it's Element
|
||||
for child in [node] + list(node):
|
||||
if child.tail:
|
||||
if child.tail.strip():
|
||||
self.__processElementText(
|
||||
node, child, False
|
||||
)
|
||||
if child.text:
|
||||
if child.text.strip():
|
||||
self.__processElementText(child, child)
|
||||
else: # it's just a string
|
||||
linkText(node)
|
||||
strartIndex = phEndIndex
|
||||
continue
|
||||
|
||||
strartIndex = phEndIndex
|
||||
result.append((node, self.ancestors[:]))
|
||||
|
||||
else: # wrong placeholder
|
||||
end = index + len(self.__placeholder_prefix)
|
||||
linkText(data[strartIndex:end])
|
||||
strartIndex = end
|
||||
else:
|
||||
text = data[strartIndex:]
|
||||
if isinstance(data, util.AtomicString):
|
||||
# We don't want to loose the `AtomicString`
|
||||
text = util.AtomicString(text)
|
||||
linkText(text)
|
||||
data = ""
|
||||
|
||||
return result
|
||||
|
||||
def __applyPattern(
|
||||
self,
|
||||
pattern: inlinepatterns.Pattern,
|
||||
data: str,
|
||||
patternIndex: int,
|
||||
startIndex: int = 0
|
||||
) -> tuple[str, bool, int]:
|
||||
"""
|
||||
Check if the line fits the pattern, create the necessary
|
||||
elements, add it to `stashed_nodes`.
|
||||
|
||||
Arguments:
|
||||
data: The text to be processed.
|
||||
pattern: The pattern to be checked.
|
||||
patternIndex: Index of current pattern.
|
||||
startIndex: String index, from which we start searching.
|
||||
|
||||
Returns:
|
||||
String with placeholders instead of `ElementTree` elements.
|
||||
|
||||
"""
|
||||
new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
|
||||
|
||||
for exclude in pattern.ANCESTOR_EXCLUDES:
|
||||
if exclude.lower() in self.ancestors:
|
||||
return data, False, 0
|
||||
|
||||
if new_style:
|
||||
match = None
|
||||
# Since `handleMatch` may reject our first match,
|
||||
# we iterate over the buffer looking for matches
|
||||
# until we can't find any more.
|
||||
for match in pattern.getCompiledRegExp().finditer(data, startIndex):
|
||||
node, start, end = pattern.handleMatch(match, data)
|
||||
if start is None or end is None:
|
||||
startIndex += match.end(0)
|
||||
match = None
|
||||
continue
|
||||
break
|
||||
else: # pragma: no cover
|
||||
match = pattern.getCompiledRegExp().match(data[startIndex:])
|
||||
leftData = data[:startIndex]
|
||||
|
||||
if not match:
|
||||
return data, False, 0
|
||||
|
||||
if not new_style: # pragma: no cover
|
||||
node = pattern.handleMatch(match)
|
||||
start = match.start(0)
|
||||
end = match.end(0)
|
||||
|
||||
if node is None:
|
||||
return data, True, end
|
||||
|
||||
if not isinstance(node, str):
|
||||
if not isinstance(node.text, util.AtomicString):
|
||||
# We need to process current node too
|
||||
for child in [node] + list(node):
|
||||
if not isString(node):
|
||||
if child.text:
|
||||
self.ancestors.append(child.tag.lower())
|
||||
child.text = self.__handleInline(
|
||||
child.text, patternIndex + 1
|
||||
)
|
||||
self.ancestors.pop()
|
||||
if child.tail:
|
||||
child.tail = self.__handleInline(
|
||||
child.tail, patternIndex
|
||||
)
|
||||
|
||||
placeholder = self.__stashNode(node, pattern.type())
|
||||
|
||||
if new_style:
|
||||
return "{}{}{}".format(data[:start],
|
||||
placeholder, data[end:]), True, 0
|
||||
else: # pragma: no cover
|
||||
return "{}{}{}{}".format(leftData,
|
||||
match.group(1),
|
||||
placeholder, match.groups()[-1]), True, 0
|
||||
|
||||
def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None:
|
||||
"""Build the ancestor list."""
|
||||
ancestors = []
|
||||
while parent is not None:
|
||||
if parent is not None:
|
||||
ancestors.append(parent.tag.lower())
|
||||
parent = self.parent_map.get(parent)
|
||||
ancestors.reverse()
|
||||
parents.extend(ancestors)
|
||||
|
||||
def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element:
|
||||
"""Apply inline patterns to a parsed Markdown tree.
|
||||
|
||||
Iterate over `Element`, find elements with inline tag, apply inline
|
||||
patterns and append newly created Elements to tree. To avoid further
|
||||
processing of string with inline patterns, instead of normal string,
|
||||
use subclass [`AtomicString`][markdown.util.AtomicString]:
|
||||
|
||||
node.text = markdown.util.AtomicString("This will not be processed.")
|
||||
|
||||
Arguments:
|
||||
tree: `Element` object, representing Markdown tree.
|
||||
ancestors: List of parent tag names that precede the tree node (if needed).
|
||||
|
||||
Returns:
|
||||
An element tree object with applied inline patterns.
|
||||
|
||||
"""
|
||||
self.stashed_nodes: dict[str, etree.Element | str] = {}
|
||||
|
||||
# Ensure a valid parent list, but copy passed in lists
|
||||
# to ensure we don't have the user accidentally change it on us.
|
||||
tree_parents = [] if ancestors is None else ancestors[:]
|
||||
|
||||
self.parent_map = {c: p for p in tree.iter() for c in p}
|
||||
stack = [(tree, tree_parents)]
|
||||
|
||||
while stack:
|
||||
currElement, parents = stack.pop()
|
||||
|
||||
self.ancestors = parents
|
||||
self.__build_ancestors(currElement, self.ancestors)
|
||||
|
||||
insertQueue = []
|
||||
for child in currElement:
|
||||
if child.text and not isinstance(
|
||||
child.text, util.AtomicString
|
||||
):
|
||||
self.ancestors.append(child.tag.lower())
|
||||
text = child.text
|
||||
child.text = None
|
||||
lst = self.__processPlaceholders(
|
||||
self.__handleInline(text), child
|
||||
)
|
||||
for item in lst:
|
||||
self.parent_map[item[0]] = child
|
||||
stack += lst
|
||||
insertQueue.append((child, lst))
|
||||
self.ancestors.pop()
|
||||
if child.tail:
|
||||
tail = self.__handleInline(child.tail)
|
||||
dumby = etree.Element('d')
|
||||
child.tail = None
|
||||
tailResult = self.__processPlaceholders(tail, dumby, False)
|
||||
if dumby.tail:
|
||||
child.tail = dumby.tail
|
||||
pos = list(currElement).index(child) + 1
|
||||
tailResult.reverse()
|
||||
for newChild in tailResult:
|
||||
self.parent_map[newChild[0]] = currElement
|
||||
currElement.insert(pos, newChild[0])
|
||||
if len(child):
|
||||
self.parent_map[child] = currElement
|
||||
stack.append((child, self.ancestors[:]))
|
||||
|
||||
for element, lst in insertQueue:
|
||||
for i, obj in enumerate(lst):
|
||||
newChild = obj[0]
|
||||
element.insert(i, newChild)
|
||||
return tree
|
||||
|
||||
|
||||
class PrettifyTreeprocessor(Treeprocessor):
|
||||
""" Add line breaks to the html document. """
|
||||
|
||||
def _prettifyETree(self, elem: etree.Element) -> None:
|
||||
""" Recursively add line breaks to `ElementTree` children. """
|
||||
|
||||
i = "\n"
|
||||
if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
|
||||
if (not elem.text or not elem.text.strip()) \
|
||||
and len(elem) and self.md.is_block_level(elem[0].tag):
|
||||
elem.text = i
|
||||
for e in elem:
|
||||
if self.md.is_block_level(e.tag):
|
||||
self._prettifyETree(e)
|
||||
if not elem.tail or not elem.tail.strip():
|
||||
elem.tail = i
|
||||
|
||||
def run(self, root: etree.Element) -> None:
|
||||
""" Add line breaks to `Element` object and its children. """
|
||||
|
||||
self._prettifyETree(root)
|
||||
# Do `<br />`'s separately as they are often in the middle of
|
||||
# inline content and missed by `_prettifyETree`.
|
||||
brs = root.iter('br')
|
||||
for br in brs:
|
||||
if not br.tail or not br.tail.strip():
|
||||
br.tail = '\n'
|
||||
else:
|
||||
br.tail = '\n%s' % br.tail
|
||||
# Clean up extra empty lines at end of code blocks.
|
||||
pres = root.iter('pre')
|
||||
for pre in pres:
|
||||
if len(pre) and pre[0].tag == 'code':
|
||||
code = pre[0]
|
||||
# Only prettify code containing text only
|
||||
if not len(code) and code.text is not None:
|
||||
code.text = util.AtomicString(code.text.rstrip() + '\n')
|
||||
|
||||
|
||||
class UnescapeTreeprocessor(Treeprocessor):
|
||||
""" Restore escaped chars """
|
||||
|
||||
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
|
||||
|
||||
def _unescape(self, m: re.Match[str]) -> str:
|
||||
return chr(int(m.group(1)))
|
||||
|
||||
def unescape(self, text: str) -> str:
|
||||
return self.RE.sub(self._unescape, text)
|
||||
|
||||
def run(self, root: etree.Element) -> None:
|
||||
""" Loop over all elements and unescape all text. """
|
||||
for elem in root.iter():
|
||||
# Unescape text content
|
||||
if elem.text and not elem.tag == 'code':
|
||||
elem.text = self.unescape(elem.text)
|
||||
# Unescape tail content
|
||||
if elem.tail:
|
||||
elem.tail = self.unescape(elem.tail)
|
||||
# Unescape attribute values
|
||||
for key, value in elem.items():
|
||||
elem.set(key, self.unescape(value))
|
||||
Reference in New Issue
Block a user