initial commit

2024-09-29 01:45:31 -04:00
commit 242841c44b
8018 changed files with 1426958 additions and 0 deletions
--- a/convertions-env/lib/python3.11/site-packages/markdown/treeprocessors.py
+++ b/convertions-env/lib/python3.11/site-packages/markdown/treeprocessors.py
@@ -0,0 +1,476 @@
+# Python Markdown
+
+# A Python implementation of John Gruber's Markdown.
+
+# Documentation: https://python-markdown.github.io/
+# GitHub: https://github.com/Python-Markdown/markdown/
+# PyPI: https://pypi.org/project/Markdown/
+
+# Started by Manfred Stienstra (http://www.dwerg.net/).
+# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
+# Currently maintained by Waylan Limberg (https://github.com/waylan),
+# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
+
+# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
+# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+# Copyright 2004 Manfred Stienstra (the original version)
+
+# License: BSD (see LICENSE.md for details).
+
+"""
+Tree processors manipulate the tree created by block processors. They can even create an entirely
+new `ElementTree` object. This is an excellent place for creating summaries, adding collected
+references, or last minute adjustments.
+
+"""
+
+from __future__ import annotations
+
+import re
+import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING, Any
+from . import util
+from . import inlinepatterns
+
+if TYPE_CHECKING:  # pragma: no cover
+    from markdown import Markdown
+
+
+def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
+    """ Build the default  `treeprocessors` for Markdown. """
+    treeprocessors = util.Registry()
+    treeprocessors.register(InlineProcessor(md), 'inline', 20)
+    treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
+    treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
+    return treeprocessors
+
+
+def isString(s: object) -> bool:
+    """ Return `True` if object is a string but not an  [`AtomicString`][markdown.util.AtomicString]. """
+    if not isinstance(s, util.AtomicString):
+        return isinstance(s, str)
+    return False
+
+
+class Treeprocessor(util.Processor):
+    """
+    `Treeprocessor`s are run on the `ElementTree` object before serialization.
+
+    Each `Treeprocessor` implements a `run` method that takes a pointer to an
+    `Element` and modifies it as necessary.
+
+    `Treeprocessors` must extend `markdown.Treeprocessor`.
+
+    """
+    def run(self, root: etree.Element) -> etree.Element | None:
+        """
+        Subclasses of `Treeprocessor` should implement a `run` method, which
+        takes a root `Element`. This method can return another `Element`
+        object, and the existing root `Element` will be replaced, or it can
+        modify the current tree and return `None`.
+        """
+        pass  # pragma: no cover
+
+
+class InlineProcessor(Treeprocessor):
+    """
+    A `Treeprocessor` that traverses a tree, applying inline patterns.
+    """
+
+    def __init__(self, md: Markdown):
+        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+        self.__placeholder_suffix = util.ETX
+        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+                                      + len(self.__placeholder_suffix)
+        self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
+        self.md = md
+        self.inlinePatterns = md.inlinePatterns
+        self.ancestors: list[str] = []
+
+    def __makePlaceholder(self, type: str) -> tuple[str, str]:
+        """ Generate a placeholder """
+        id = "%04d" % len(self.stashed_nodes)
+        hash = util.INLINE_PLACEHOLDER % id
+        return hash, id
+
+    def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]:
+        """
+        Extract id from data string, start from index.
+
+        Arguments:
+            data: String.
+            index: Index, from which we start search.
+
+        Returns:
+            Placeholder id and string index, after the found placeholder.
+
+        """
+        m = self.__placeholder_re.search(data, index)
+        if m:
+            return m.group(1), m.end()
+        else:
+            return None, index + 1
+
+    def __stashNode(self, node: etree.Element | str, type: str) -> str:
+        """ Add node to stash. """
+        placeholder, id = self.__makePlaceholder(type)
+        self.stashed_nodes[id] = node
+        return placeholder
+
+    def __handleInline(self, data: str, patternIndex: int = 0) -> str:
+        """
+        Process string with inline patterns and replace it with placeholders.
+
+        Arguments:
+            data: A line of Markdown text.
+            patternIndex: The index of the `inlinePattern` to start with.
+
+        Returns:
+            String with placeholders.
+
+        """
+        if not isinstance(data, util.AtomicString):
+            startIndex = 0
+            count = len(self.inlinePatterns)
+            while patternIndex < count:
+                data, matched, startIndex = self.__applyPattern(
+                    self.inlinePatterns[patternIndex], data, patternIndex, startIndex
+                )
+                if not matched:
+                    patternIndex += 1
+        return data
+
+    def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
+        """
+        Process placeholders in `Element.text` or `Element.tail`
+        of Elements popped from `self.stashed_nodes`.
+
+        Arguments:
+            node: Parent node.
+            subnode: Processing node.
+            isText: Boolean variable, True - it's text, False - it's a tail.
+
+        """
+        if isText:
+            text = subnode.text
+            subnode.text = None
+        else:
+            text = subnode.tail
+            subnode.tail = None
+
+        childResult = self.__processPlaceholders(text, subnode, isText)
+
+        if not isText and node is not subnode:
+            pos = list(node).index(subnode) + 1
+        else:
+            pos = 0
+
+        childResult.reverse()
+        for newChild in childResult:
+            node.insert(pos, newChild[0])
+
+    def __processPlaceholders(
+        self,
+        data: str | None,
+        parent: etree.Element,
+        isText: bool = True
+    ) -> list[tuple[etree.Element, list[str]]]:
+        """
+        Process string with placeholders and generate `ElementTree` tree.
+
+        Arguments:
+            data: String with placeholders instead of `ElementTree` elements.
+            parent: Element, which contains processing inline data.
+            isText: Boolean variable, True - it's text, False - it's a tail.
+
+        Returns:
+            List with `ElementTree` elements with applied inline patterns.
+
+        """
+        def linkText(text: str | None) -> None:
+            if text:
+                if result:
+                    if result[-1][0].tail:
+                        result[-1][0].tail += text
+                    else:
+                        result[-1][0].tail = text
+                elif not isText:
+                    if parent.tail:
+                        parent.tail += text
+                    else:
+                        parent.tail = text
+                else:
+                    if parent.text:
+                        parent.text += text
+                    else:
+                        parent.text = text
+        result = []
+        strartIndex = 0
+        while data:
+            index = data.find(self.__placeholder_prefix, strartIndex)
+            if index != -1:
+                id, phEndIndex = self.__findPlaceholder(data, index)
+
+                if id in self.stashed_nodes:
+                    node = self.stashed_nodes.get(id)
+
+                    if index > 0:
+                        text = data[strartIndex:index]
+                        linkText(text)
+
+                    if not isinstance(node, str):  # it's Element
+                        for child in [node] + list(node):
+                            if child.tail:
+                                if child.tail.strip():
+                                    self.__processElementText(
+                                        node, child, False
+                                    )
+                            if child.text:
+                                if child.text.strip():
+                                    self.__processElementText(child, child)
+                    else:  # it's just a string
+                        linkText(node)
+                        strartIndex = phEndIndex
+                        continue
+
+                    strartIndex = phEndIndex
+                    result.append((node, self.ancestors[:]))
+
+                else:  # wrong placeholder
+                    end = index + len(self.__placeholder_prefix)
+                    linkText(data[strartIndex:end])
+                    strartIndex = end
+            else:
+                text = data[strartIndex:]
+                if isinstance(data, util.AtomicString):
+                    # We don't want to loose the `AtomicString`
+                    text = util.AtomicString(text)
+                linkText(text)
+                data = ""
+
+        return result
+
+    def __applyPattern(
+        self,
+        pattern: inlinepatterns.Pattern,
+        data: str,
+        patternIndex: int,
+        startIndex: int = 0
+    ) -> tuple[str, bool, int]:
+        """
+        Check if the line fits the pattern, create the necessary
+        elements, add it to `stashed_nodes`.
+
+        Arguments:
+            data: The text to be processed.
+            pattern: The pattern to be checked.
+            patternIndex: Index of current pattern.
+            startIndex: String index, from which we start searching.
+
+        Returns:
+            String with placeholders instead of `ElementTree` elements.
+
+        """
+        new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
+
+        for exclude in pattern.ANCESTOR_EXCLUDES:
+            if exclude.lower() in self.ancestors:
+                return data, False, 0
+
+        if new_style:
+            match = None
+            # Since `handleMatch` may reject our first match,
+            # we iterate over the buffer looking for matches
+            # until we can't find any more.
+            for match in pattern.getCompiledRegExp().finditer(data, startIndex):
+                node, start, end = pattern.handleMatch(match, data)
+                if start is None or end is None:
+                    startIndex += match.end(0)
+                    match = None
+                    continue
+                break
+        else:  # pragma: no cover
+            match = pattern.getCompiledRegExp().match(data[startIndex:])
+            leftData = data[:startIndex]
+
+        if not match:
+            return data, False, 0
+
+        if not new_style:  # pragma: no cover
+            node = pattern.handleMatch(match)
+            start = match.start(0)
+            end = match.end(0)
+
+        if node is None:
+            return data, True, end
+
+        if not isinstance(node, str):
+            if not isinstance(node.text, util.AtomicString):
+                # We need to process current node too
+                for child in [node] + list(node):
+                    if not isString(node):
+                        if child.text:
+                            self.ancestors.append(child.tag.lower())
+                            child.text = self.__handleInline(
+                                child.text, patternIndex + 1
+                            )
+                            self.ancestors.pop()
+                        if child.tail:
+                            child.tail = self.__handleInline(
+                                child.tail, patternIndex
+                            )
+
+        placeholder = self.__stashNode(node, pattern.type())
+
+        if new_style:
+            return "{}{}{}".format(data[:start],
+                                   placeholder, data[end:]), True, 0
+        else:  # pragma: no cover
+            return "{}{}{}{}".format(leftData,
+                                     match.group(1),
+                                     placeholder, match.groups()[-1]), True, 0
+
+    def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None:
+        """Build the ancestor list."""
+        ancestors = []
+        while parent is not None:
+            if parent is not None:
+                ancestors.append(parent.tag.lower())
+            parent = self.parent_map.get(parent)
+        ancestors.reverse()
+        parents.extend(ancestors)
+
+    def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element:
+        """Apply inline patterns to a parsed Markdown tree.
+
+        Iterate over `Element`, find elements with inline tag, apply inline
+        patterns and append newly created Elements to tree.  To avoid further
+        processing of string with inline patterns, instead of normal string,
+        use subclass [`AtomicString`][markdown.util.AtomicString]:
+
+            node.text = markdown.util.AtomicString("This will not be processed.")
+
+        Arguments:
+            tree: `Element` object, representing Markdown tree.
+            ancestors: List of parent tag names that precede the tree node (if needed).
+
+        Returns:
+            An element tree object with applied inline patterns.
+
+        """
+        self.stashed_nodes: dict[str, etree.Element | str] = {}
+
+        # Ensure a valid parent list, but copy passed in lists
+        # to ensure we don't have the user accidentally change it on us.
+        tree_parents = [] if ancestors is None else ancestors[:]
+
+        self.parent_map = {c: p for p in tree.iter() for c in p}
+        stack = [(tree, tree_parents)]
+
+        while stack:
+            currElement, parents = stack.pop()
+
+            self.ancestors = parents
+            self.__build_ancestors(currElement, self.ancestors)
+
+            insertQueue = []
+            for child in currElement:
+                if child.text and not isinstance(
+                    child.text, util.AtomicString
+                ):
+                    self.ancestors.append(child.tag.lower())
+                    text = child.text
+                    child.text = None
+                    lst = self.__processPlaceholders(
+                        self.__handleInline(text), child
+                    )
+                    for item in lst:
+                        self.parent_map[item[0]] = child
+                    stack += lst
+                    insertQueue.append((child, lst))
+                    self.ancestors.pop()
+                if child.tail:
+                    tail = self.__handleInline(child.tail)
+                    dumby = etree.Element('d')
+                    child.tail = None
+                    tailResult = self.__processPlaceholders(tail, dumby, False)
+                    if dumby.tail:
+                        child.tail = dumby.tail
+                    pos = list(currElement).index(child) + 1
+                    tailResult.reverse()
+                    for newChild in tailResult:
+                        self.parent_map[newChild[0]] = currElement
+                        currElement.insert(pos, newChild[0])
+                if len(child):
+                    self.parent_map[child] = currElement
+                    stack.append((child, self.ancestors[:]))
+
+            for element, lst in insertQueue:
+                for i, obj in enumerate(lst):
+                    newChild = obj[0]
+                    element.insert(i, newChild)
+        return tree
+
+
+class PrettifyTreeprocessor(Treeprocessor):
+    """ Add line breaks to the html document. """
+
+    def _prettifyETree(self, elem: etree.Element) -> None:
+        """ Recursively add line breaks to `ElementTree` children. """
+
+        i = "\n"
+        if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
+            if (not elem.text or not elem.text.strip()) \
+                    and len(elem) and self.md.is_block_level(elem[0].tag):
+                elem.text = i
+            for e in elem:
+                if self.md.is_block_level(e.tag):
+                    self._prettifyETree(e)
+        if not elem.tail or not elem.tail.strip():
+            elem.tail = i
+
+    def run(self, root: etree.Element) -> None:
+        """ Add line breaks to `Element` object and its children. """
+
+        self._prettifyETree(root)
+        # Do `<br />`'s separately as they are often in the middle of
+        # inline content and missed by `_prettifyETree`.
+        brs = root.iter('br')
+        for br in brs:
+            if not br.tail or not br.tail.strip():
+                br.tail = '\n'
+            else:
+                br.tail = '\n%s' % br.tail
+        # Clean up extra empty lines at end of code blocks.
+        pres = root.iter('pre')
+        for pre in pres:
+            if len(pre) and pre[0].tag == 'code':
+                code = pre[0]
+                # Only prettify code containing text only
+                if not len(code) and code.text is not None:
+                    code.text = util.AtomicString(code.text.rstrip() + '\n')
+
+
+class UnescapeTreeprocessor(Treeprocessor):
+    """ Restore escaped chars """
+
+    RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
+
+    def _unescape(self, m: re.Match[str]) -> str:
+        return chr(int(m.group(1)))
+
+    def unescape(self, text: str) -> str:
+        return self.RE.sub(self._unescape, text)
+
+    def run(self, root: etree.Element) -> None:
+        """ Loop over all elements and unescape all text. """
+        for elem in root.iter():
+            # Unescape text content
+            if elem.text and not elem.tag == 'code':
+                elem.text = self.unescape(elem.text)
+            # Unescape tail content
+            if elem.tail:
+                elem.tail = self.unescape(elem.tail)
+            # Unescape attribute values
+            for key, value in elem.items():
+                elem.set(key, self.unescape(value))