initial commit
This commit is contained in:
@@ -0,0 +1,641 @@
|
||||
# Python Markdown
|
||||
|
||||
# A Python implementation of John Gruber's Markdown.
|
||||
|
||||
# Documentation: https://python-markdown.github.io/
|
||||
# GitHub: https://github.com/Python-Markdown/markdown/
|
||||
# PyPI: https://pypi.org/project/Markdown/
|
||||
|
||||
# Started by Manfred Stienstra (http://www.dwerg.net/).
|
||||
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
|
||||
# Currently maintained by Waylan Limberg (https://github.com/waylan),
|
||||
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
|
||||
|
||||
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
|
||||
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
|
||||
# Copyright 2004 Manfred Stienstra (the original version)
|
||||
|
||||
# License: BSD (see LICENSE.md for details).
|
||||
|
||||
"""
|
||||
A block processor parses blocks of text and adds new elements to the ElementTree. Blocks of text,
|
||||
separated from other text by blank lines, may have a different syntax and produce a differently
|
||||
structured tree than other Markdown. Block processors excel at handling code formatting, equation
|
||||
layouts, tables, etc.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import xml.etree.ElementTree as etree
|
||||
from typing import TYPE_CHECKING, Any
|
||||
from . import util
|
||||
from .blockparser import BlockParser
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from markdown import Markdown
|
||||
|
||||
logger = logging.getLogger('MARKDOWN')
|
||||
|
||||
|
||||
def build_block_parser(md: Markdown, **kwargs: Any) -> BlockParser:
|
||||
""" Build the default block parser used by Markdown. """
|
||||
parser = BlockParser(md)
|
||||
parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100)
|
||||
parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90)
|
||||
parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80)
|
||||
parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70)
|
||||
parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60)
|
||||
parser.blockprocessors.register(HRProcessor(parser), 'hr', 50)
|
||||
parser.blockprocessors.register(OListProcessor(parser), 'olist', 40)
|
||||
parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30)
|
||||
parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20)
|
||||
parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15)
|
||||
parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10)
|
||||
return parser
|
||||
|
||||
|
||||
class BlockProcessor:
|
||||
""" Base class for block processors.
|
||||
|
||||
Each subclass will provide the methods below to work with the source and
|
||||
tree. Each processor will need to define it's own `test` and `run`
|
||||
methods. The `test` method should return True or False, to indicate
|
||||
whether the current block should be processed by this processor. If the
|
||||
test passes, the parser will call the processors `run` method.
|
||||
|
||||
Attributes:
|
||||
BlockProcessor.parser (BlockParser): The `BlockParser` instance this is attached to.
|
||||
BlockProcessor.tab_length (int): The tab length set on the `Markdown` instance.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, parser: BlockParser):
|
||||
self.parser = parser
|
||||
self.tab_length = parser.md.tab_length
|
||||
|
||||
def lastChild(self, parent: etree.Element) -> etree.Element | None:
|
||||
""" Return the last child of an `etree` element. """
|
||||
if len(parent):
|
||||
return parent[-1]
|
||||
else:
|
||||
return None
|
||||
|
||||
def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
|
||||
""" Remove a tab from the front of each line of the given text. """
|
||||
if length is None:
|
||||
length = self.tab_length
|
||||
newtext = []
|
||||
lines = text.split('\n')
|
||||
for line in lines:
|
||||
if line.startswith(' ' * length):
|
||||
newtext.append(line[length:])
|
||||
elif not line.strip():
|
||||
newtext.append('')
|
||||
else:
|
||||
break
|
||||
return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
|
||||
|
||||
def looseDetab(self, text: str, level: int = 1) -> str:
|
||||
""" Remove a tab from front of lines but allowing dedented lines. """
|
||||
lines = text.split('\n')
|
||||
for i in range(len(lines)):
|
||||
if lines[i].startswith(' '*self.tab_length*level):
|
||||
lines[i] = lines[i][self.tab_length*level:]
|
||||
return '\n'.join(lines)
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
""" Test for block type. Must be overridden by subclasses.
|
||||
|
||||
As the parser loops through processors, it will call the `test`
|
||||
method on each to determine if the given block of text is of that
|
||||
type. This method must return a boolean `True` or `False`. The
|
||||
actual method of testing is left to the needs of that particular
|
||||
block type. It could be as simple as `block.startswith(some_string)`
|
||||
or a complex regular expression. As the block type may be different
|
||||
depending on the parent of the block (i.e. inside a list), the parent
|
||||
`etree` element is also provided and may be used as part of the test.
|
||||
|
||||
Keyword arguments:
|
||||
parent: An `etree` element which will be the parent of the block.
|
||||
block: A block of text from the source which has been split at blank lines.
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
|
||||
""" Run processor. Must be overridden by subclasses.
|
||||
|
||||
When the parser determines the appropriate type of a block, the parser
|
||||
will call the corresponding processor's `run` method. This method
|
||||
should parse the individual lines of the block and append them to
|
||||
the `etree`.
|
||||
|
||||
Note that both the `parent` and `etree` keywords are pointers
|
||||
to instances of the objects which should be edited in place. Each
|
||||
processor must make changes to the existing objects as there is no
|
||||
mechanism to return new/different objects to replace them.
|
||||
|
||||
This means that this method should be adding `SubElements` or adding text
|
||||
to the parent, and should remove (`pop`) or add (`insert`) items to
|
||||
the list of blocks.
|
||||
|
||||
If `False` is returned, this will have the same effect as returning `False`
|
||||
from the `test` method.
|
||||
|
||||
Keyword arguments:
|
||||
parent: An `etree` element which is the parent of the current block.
|
||||
blocks: A list of all remaining blocks of the document.
|
||||
"""
|
||||
pass # pragma: no cover
|
||||
|
||||
|
||||
class ListIndentProcessor(BlockProcessor):
|
||||
""" Process children of list items.
|
||||
|
||||
Example
|
||||
|
||||
* a list item
|
||||
process this part
|
||||
|
||||
or this part
|
||||
|
||||
"""
|
||||
|
||||
ITEM_TYPES = ['li']
|
||||
""" List of tags used for list items. """
|
||||
LIST_TYPES = ['ul', 'ol']
|
||||
""" Types of lists this processor can operate on. """
|
||||
|
||||
def __init__(self, *args):
|
||||
super().__init__(*args)
|
||||
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return block.startswith(' '*self.tab_length) and \
|
||||
not self.parser.state.isstate('detabbed') and \
|
||||
(parent.tag in self.ITEM_TYPES or
|
||||
(len(parent) and parent[-1] is not None and
|
||||
(parent[-1].tag in self.LIST_TYPES)))
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
level, sibling = self.get_level(parent, block)
|
||||
block = self.looseDetab(block, level)
|
||||
|
||||
self.parser.state.set('detabbed')
|
||||
if parent.tag in self.ITEM_TYPES:
|
||||
# It's possible that this parent has a `ul` or `ol` child list
|
||||
# with a member. If that is the case, then that should be the
|
||||
# parent. This is intended to catch the edge case of an indented
|
||||
# list whose first member was parsed previous to this point
|
||||
# see `OListProcessor`
|
||||
if len(parent) and parent[-1].tag in self.LIST_TYPES:
|
||||
self.parser.parseBlocks(parent[-1], [block])
|
||||
else:
|
||||
# The parent is already a `li`. Just parse the child block.
|
||||
self.parser.parseBlocks(parent, [block])
|
||||
elif sibling.tag in self.ITEM_TYPES:
|
||||
# The sibling is a `li`. Use it as parent.
|
||||
self.parser.parseBlocks(sibling, [block])
|
||||
elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
|
||||
# The parent is a list (`ol` or `ul`) which has children.
|
||||
# Assume the last child `li` is the parent of this block.
|
||||
if sibling[-1].text:
|
||||
# If the parent `li` has text, that text needs to be moved to a `p`
|
||||
# The `p` must be 'inserted' at beginning of list in the event
|
||||
# that other children already exist i.e.; a nested sub-list.
|
||||
p = etree.Element('p')
|
||||
p.text = sibling[-1].text
|
||||
sibling[-1].text = ''
|
||||
sibling[-1].insert(0, p)
|
||||
self.parser.parseChunk(sibling[-1], block)
|
||||
else:
|
||||
self.create_item(sibling, block)
|
||||
self.parser.state.reset()
|
||||
|
||||
def create_item(self, parent: etree.Element, block: str) -> None:
|
||||
""" Create a new `li` and parse the block with it as the parent. """
|
||||
li = etree.SubElement(parent, 'li')
|
||||
self.parser.parseBlocks(li, [block])
|
||||
|
||||
def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Element]:
|
||||
""" Get level of indentation based on list level. """
|
||||
# Get indent level
|
||||
m = self.INDENT_RE.match(block)
|
||||
if m:
|
||||
indent_level = len(m.group(1))/self.tab_length
|
||||
else:
|
||||
indent_level = 0
|
||||
if self.parser.state.isstate('list'):
|
||||
# We're in a tight-list - so we already are at correct parent.
|
||||
level = 1
|
||||
else:
|
||||
# We're in a loose-list - so we need to find parent.
|
||||
level = 0
|
||||
# Step through children of tree to find matching indent level.
|
||||
while indent_level > level:
|
||||
child = self.lastChild(parent)
|
||||
if (child is not None and
|
||||
(child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
|
||||
if child.tag in self.LIST_TYPES:
|
||||
level += 1
|
||||
parent = child
|
||||
else:
|
||||
# No more child levels. If we're short of `indent_level`,
|
||||
# we have a code block. So we stop here.
|
||||
break
|
||||
return level, parent
|
||||
|
||||
|
||||
class CodeBlockProcessor(BlockProcessor):
|
||||
""" Process code blocks. """
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return block.startswith(' '*self.tab_length)
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
sibling = self.lastChild(parent)
|
||||
block = blocks.pop(0)
|
||||
theRest = ''
|
||||
if (sibling is not None and sibling.tag == "pre" and
|
||||
len(sibling) and sibling[0].tag == "code"):
|
||||
# The previous block was a code block. As blank lines do not start
|
||||
# new code blocks, append this block to the previous, adding back
|
||||
# line breaks removed from the split into a list.
|
||||
code = sibling[0]
|
||||
block, theRest = self.detab(block)
|
||||
code.text = util.AtomicString(
|
||||
'{}\n{}\n'.format(code.text, util.code_escape(block.rstrip()))
|
||||
)
|
||||
else:
|
||||
# This is a new code block. Create the elements and insert text.
|
||||
pre = etree.SubElement(parent, 'pre')
|
||||
code = etree.SubElement(pre, 'code')
|
||||
block, theRest = self.detab(block)
|
||||
code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip()))
|
||||
if theRest:
|
||||
# This block contained unindented line(s) after the first indented
|
||||
# line. Insert these lines as the first block of the master blocks
|
||||
# list for future processing.
|
||||
blocks.insert(0, theRest)
|
||||
|
||||
|
||||
class BlockQuoteProcessor(BlockProcessor):
|
||||
""" Process blockquotes. """
|
||||
|
||||
RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
before = block[:m.start()] # Lines before blockquote
|
||||
# Pass lines before blockquote in recursively for parsing first.
|
||||
self.parser.parseBlocks(parent, [before])
|
||||
# Remove `> ` from beginning of each line.
|
||||
block = '\n'.join(
|
||||
[self.clean(line) for line in block[m.start():].split('\n')]
|
||||
)
|
||||
sibling = self.lastChild(parent)
|
||||
if sibling is not None and sibling.tag == "blockquote":
|
||||
# Previous block was a blockquote so set that as this blocks parent
|
||||
quote = sibling
|
||||
else:
|
||||
# This is a new blockquote. Create a new parent element.
|
||||
quote = etree.SubElement(parent, 'blockquote')
|
||||
# Recursively parse block with blockquote as parent.
|
||||
# change parser state so blockquotes embedded in lists use `p` tags
|
||||
self.parser.state.set('blockquote')
|
||||
self.parser.parseChunk(quote, block)
|
||||
self.parser.state.reset()
|
||||
|
||||
def clean(self, line: str) -> str:
|
||||
""" Remove `>` from beginning of a line. """
|
||||
m = self.RE.match(line)
|
||||
if line.strip() == ">":
|
||||
return ""
|
||||
elif m:
|
||||
return m.group(2)
|
||||
else:
|
||||
return line
|
||||
|
||||
|
||||
class OListProcessor(BlockProcessor):
|
||||
""" Process ordered list blocks. """
|
||||
|
||||
TAG: str = 'ol'
|
||||
""" The tag used for the the wrapping element. """
|
||||
STARTSWITH: str = '1'
|
||||
"""
|
||||
The integer (as a string ) with which the list starts. For example, if a list is initialized as
|
||||
`3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
|
||||
"""
|
||||
LAZY_OL: bool = True
|
||||
""" Ignore `STARTSWITH` if `True`. """
|
||||
SIBLING_TAGS: list[str] = ['ol', 'ul']
|
||||
"""
|
||||
Markdown does not require the type of a new list item match the previous list item type.
|
||||
This is the list of types which can be mixed.
|
||||
"""
|
||||
|
||||
def __init__(self, parser: BlockParser):
|
||||
super().__init__(parser)
|
||||
# Detect an item (`1. item`). `group(1)` contains contents of item.
|
||||
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
|
||||
# Detect items on secondary lines. they can be of either list type.
|
||||
self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' %
|
||||
(self.tab_length - 1))
|
||||
# Detect indented (nested) items of either type
|
||||
self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
|
||||
(self.tab_length, self.tab_length * 2 - 1))
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return bool(self.RE.match(block))
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
# Check for multiple items in one block.
|
||||
items = self.get_items(blocks.pop(0))
|
||||
sibling = self.lastChild(parent)
|
||||
|
||||
if sibling is not None and sibling.tag in self.SIBLING_TAGS:
|
||||
# Previous block was a list item, so set that as parent
|
||||
lst = sibling
|
||||
# make sure previous item is in a `p` - if the item has text,
|
||||
# then it isn't in a `p`
|
||||
if lst[-1].text:
|
||||
# since it's possible there are other children for this
|
||||
# sibling, we can't just `SubElement` the `p`, we need to
|
||||
# insert it as the first item.
|
||||
p = etree.Element('p')
|
||||
p.text = lst[-1].text
|
||||
lst[-1].text = ''
|
||||
lst[-1].insert(0, p)
|
||||
# if the last item has a tail, then the tail needs to be put in a `p`
|
||||
# likely only when a header is not followed by a blank line
|
||||
lch = self.lastChild(lst[-1])
|
||||
if lch is not None and lch.tail:
|
||||
p = etree.SubElement(lst[-1], 'p')
|
||||
p.text = lch.tail.lstrip()
|
||||
lch.tail = ''
|
||||
|
||||
# parse first block differently as it gets wrapped in a `p`.
|
||||
li = etree.SubElement(lst, 'li')
|
||||
self.parser.state.set('looselist')
|
||||
firstitem = items.pop(0)
|
||||
self.parser.parseBlocks(li, [firstitem])
|
||||
self.parser.state.reset()
|
||||
elif parent.tag in ['ol', 'ul']:
|
||||
# this catches the edge case of a multi-item indented list whose
|
||||
# first item is in a blank parent-list item:
|
||||
# * * subitem1
|
||||
# * subitem2
|
||||
# see also `ListIndentProcessor`
|
||||
lst = parent
|
||||
else:
|
||||
# This is a new list so create parent with appropriate tag.
|
||||
lst = etree.SubElement(parent, self.TAG)
|
||||
# Check if a custom start integer is set
|
||||
if not self.LAZY_OL and self.STARTSWITH != '1':
|
||||
lst.attrib['start'] = self.STARTSWITH
|
||||
|
||||
self.parser.state.set('list')
|
||||
# Loop through items in block, recursively parsing each with the
|
||||
# appropriate parent.
|
||||
for item in items:
|
||||
if item.startswith(' '*self.tab_length):
|
||||
# Item is indented. Parse with last item as parent
|
||||
self.parser.parseBlocks(lst[-1], [item])
|
||||
else:
|
||||
# New item. Create `li` and parse with it as parent
|
||||
li = etree.SubElement(lst, 'li')
|
||||
self.parser.parseBlocks(li, [item])
|
||||
self.parser.state.reset()
|
||||
|
||||
def get_items(self, block: str) -> list[str]:
|
||||
""" Break a block into list items. """
|
||||
items = []
|
||||
for line in block.split('\n'):
|
||||
m = self.CHILD_RE.match(line)
|
||||
if m:
|
||||
# This is a new list item
|
||||
# Check first item for the start index
|
||||
if not items and self.TAG == 'ol':
|
||||
# Detect the integer value of first list item
|
||||
INTEGER_RE = re.compile(r'(\d+)')
|
||||
self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
|
||||
# Append to the list
|
||||
items.append(m.group(3))
|
||||
elif self.INDENT_RE.match(line):
|
||||
# This is an indented (possibly nested) item.
|
||||
if items[-1].startswith(' '*self.tab_length):
|
||||
# Previous item was indented. Append to that item.
|
||||
items[-1] = '{}\n{}'.format(items[-1], line)
|
||||
else:
|
||||
items.append(line)
|
||||
else:
|
||||
# This is another line of previous item. Append to that item.
|
||||
items[-1] = '{}\n{}'.format(items[-1], line)
|
||||
return items
|
||||
|
||||
|
||||
class UListProcessor(OListProcessor):
|
||||
""" Process unordered list blocks. """
|
||||
|
||||
TAG: str = 'ul'
|
||||
""" The tag used for the the wrapping element. """
|
||||
|
||||
def __init__(self, parser: BlockParser):
|
||||
super().__init__(parser)
|
||||
# Detect an item (`1. item`). `group(1)` contains contents of item.
|
||||
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
|
||||
|
||||
|
||||
class HashHeaderProcessor(BlockProcessor):
|
||||
""" Process Hash Headers. """
|
||||
|
||||
# Detect a header at start of any line in block
|
||||
RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return bool(self.RE.search(block))
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
before = block[:m.start()] # All lines before header
|
||||
after = block[m.end():] # All lines after header
|
||||
if before:
|
||||
# As the header was not the first line of the block and the
|
||||
# lines before the header must be parsed first,
|
||||
# recursively parse this lines as a block.
|
||||
self.parser.parseBlocks(parent, [before])
|
||||
# Create header using named groups from RE
|
||||
h = etree.SubElement(parent, 'h%d' % len(m.group('level')))
|
||||
h.text = m.group('header').strip()
|
||||
if after:
|
||||
# Insert remaining lines as first block for future parsing.
|
||||
if self.parser.state.isstate('looselist'):
|
||||
# This is a weird edge case where a header is a child of a loose list
|
||||
# and there is no blank line after the header. To ensure proper
|
||||
# parsing, the line(s) after need to be detabbed. See #1443.
|
||||
after = self.looseDetab(after)
|
||||
blocks.insert(0, after)
|
||||
else: # pragma: no cover
|
||||
# This should never happen, but just in case...
|
||||
logger.warn("We've got a problem header: %r" % block)
|
||||
|
||||
|
||||
class SetextHeaderProcessor(BlockProcessor):
|
||||
""" Process Setext-style Headers. """
|
||||
|
||||
# Detect Setext-style header. Must be first 2 lines of block.
|
||||
RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return bool(self.RE.match(block))
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
lines = blocks.pop(0).split('\n')
|
||||
# Determine level. `=` is 1 and `-` is 2.
|
||||
if lines[1].startswith('='):
|
||||
level = 1
|
||||
else:
|
||||
level = 2
|
||||
h = etree.SubElement(parent, 'h%d' % level)
|
||||
h.text = lines[0].strip()
|
||||
if len(lines) > 2:
|
||||
# Block contains additional lines. Add to master blocks for later.
|
||||
blocks.insert(0, '\n'.join(lines[2:]))
|
||||
|
||||
|
||||
class HRProcessor(BlockProcessor):
|
||||
""" Process Horizontal Rules. """
|
||||
|
||||
# Python's `re` module doesn't officially support atomic grouping. However you can fake it.
|
||||
# See https://stackoverflow.com/a/13577411/866026
|
||||
RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
|
||||
# Detect hr on any line of a block.
|
||||
SEARCH_RE = re.compile(RE, re.MULTILINE)
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
m = self.SEARCH_RE.search(block)
|
||||
if m:
|
||||
# Save match object on class instance so we can use it later.
|
||||
self.match = m
|
||||
return True
|
||||
return False
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
match = self.match
|
||||
# Check for lines in block before `hr`.
|
||||
prelines = block[:match.start()].rstrip('\n')
|
||||
if prelines:
|
||||
# Recursively parse lines before `hr` so they get parsed first.
|
||||
self.parser.parseBlocks(parent, [prelines])
|
||||
# create hr
|
||||
etree.SubElement(parent, 'hr')
|
||||
# check for lines in block after `hr`.
|
||||
postlines = block[match.end():].lstrip('\n')
|
||||
if postlines:
|
||||
# Add lines after `hr` to master blocks for later parsing.
|
||||
blocks.insert(0, postlines)
|
||||
|
||||
|
||||
class EmptyBlockProcessor(BlockProcessor):
|
||||
""" Process blocks that are empty or start with an empty line. """
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return not block or block.startswith('\n')
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
filler = '\n\n'
|
||||
if block:
|
||||
# Starts with empty line
|
||||
# Only replace a single line.
|
||||
filler = '\n'
|
||||
# Save the rest for later.
|
||||
theRest = block[1:]
|
||||
if theRest:
|
||||
# Add remaining lines to master blocks for later.
|
||||
blocks.insert(0, theRest)
|
||||
sibling = self.lastChild(parent)
|
||||
if (sibling is not None and sibling.tag == 'pre' and
|
||||
len(sibling) and sibling[0].tag == 'code'):
|
||||
# Last block is a code block. Append to preserve whitespace.
|
||||
sibling[0].text = util.AtomicString(
|
||||
'{}{}'.format(sibling[0].text, filler)
|
||||
)
|
||||
|
||||
|
||||
class ReferenceProcessor(BlockProcessor):
|
||||
""" Process link references. """
|
||||
RE = re.compile(
|
||||
r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
|
||||
)
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return True
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
id = m.group(1).strip().lower()
|
||||
link = m.group(2).lstrip('<').rstrip('>')
|
||||
title = m.group(5) or m.group(6)
|
||||
self.parser.md.references[id] = (link, title)
|
||||
if block[m.end():].strip():
|
||||
# Add any content after match back to blocks as separate block
|
||||
blocks.insert(0, block[m.end():].lstrip('\n'))
|
||||
if block[:m.start()].strip():
|
||||
# Add any content before match back to blocks as separate block
|
||||
blocks.insert(0, block[:m.start()].rstrip('\n'))
|
||||
return True
|
||||
# No match. Restore block.
|
||||
blocks.insert(0, block)
|
||||
return False
|
||||
|
||||
|
||||
class ParagraphProcessor(BlockProcessor):
|
||||
""" Process Paragraph blocks. """
|
||||
|
||||
def test(self, parent: etree.Element, block: str) -> bool:
|
||||
return True
|
||||
|
||||
def run(self, parent: etree.Element, blocks: list[str]) -> None:
|
||||
block = blocks.pop(0)
|
||||
if block.strip():
|
||||
# Not a blank block. Add to parent, otherwise throw it away.
|
||||
if self.parser.state.isstate('list'):
|
||||
# The parent is a tight-list.
|
||||
#
|
||||
# Check for any children. This will likely only happen in a
|
||||
# tight-list when a header isn't followed by a blank line.
|
||||
# For example:
|
||||
#
|
||||
# * # Header
|
||||
# Line 2 of list item - not part of header.
|
||||
sibling = self.lastChild(parent)
|
||||
if sibling is not None:
|
||||
# Insert after sibling.
|
||||
if sibling.tail:
|
||||
sibling.tail = '{}\n{}'.format(sibling.tail, block)
|
||||
else:
|
||||
sibling.tail = '\n%s' % block
|
||||
else:
|
||||
# Append to parent.text
|
||||
if parent.text:
|
||||
parent.text = '{}\n{}'.format(parent.text, block)
|
||||
else:
|
||||
parent.text = block.lstrip()
|
||||
else:
|
||||
# Create a regular paragraph
|
||||
p = etree.SubElement(parent, 'p')
|
||||
p.text = block.lstrip()
|
||||
Reference in New Issue
Block a user