TASVideos Pandoc: tasvideos_forum.lua at [53d358e416]

File tasvideos_forum.lua artifact 9d35dd4bee part of check-in 53d358e416

-- Pandoc custom writer for TASVideos forum markup.
-- https://tasvideos.org/ForumMarkup
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs
--
-- Usage (see https://pandoc.org/MANUAL.html#custom-readers-and-writers):
-- 	pandoc -t tasvideos_forum.lua input.md
-- tasvideos_forum.lua may be given as an absolute or relative path, or it can
-- be placed in the "custom" subdirectory of the user data directory:
-- https://pandoc.org/MANUAL.html#option--data-dir. Many input formats may be
-- used, see https://pandoc.org/MANUAL.html#option--from.
--
-- To output specific BBCode tags, inclusing TASVideos-specific markup such as
-- [movie] and [post], represent them in Markdown with raw attribute tags and a
-- format name of "tasvideos_forum" (see
-- https://pandoc.org/MANUAL.html#generic-raw-attribute). Like so:
-- 	This is `[frames]100[/frames]`{=tasvideos_forum} faster than `[movie]1234[/movie]`{=tasvideos_forum}.
--
-- 	```{=tasvideos_forum}
-- 	[note]
-- 	More details are in [post=1234]this post[/post].
-- 	[/note]
-- 	```
--
-- The download filename for code blocks can be provided by a "filename"
-- attribute, like so:
-- 	``` {filename=test.lua}
-- 	print("hello")
-- 	```
--
-- Image paths/URLs are converted to data URIs when the embed_resources
-- extension is activated. This is similar to the --embed-resources option for
-- the pandoc program.
-- 	pandoc -t tasvideos_forum.lua+embed_resources input.md

-- The format recognized by RawBlock and RawInline.
local MY_FORMAT = "tasvideos_forum"

Extensions = {
	-- Convert [img] tag paths/URLs to data URIs. We implement the flag as
	-- an extension because we cannot access the Pandoc --embed-resources
	-- option: https://github.com/jgm/pandoc/discussions/9978.
	embed_resources = false,
}

-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L80
-- The important piece of information about each BBCode tag, for writing, is
-- whether child elements are allowed in it or not. Some tags, like [b], always
-- allow new child elements; some, like [code], never allow child elements; and
-- some, like [url], allow child elements only if a parameter is set on the
-- tag. For example, in:
-- 	[url]http://example.com/[b]path[/b][/url]
-- the `[b]...[/b]` is a literal part of the URL path. But with a parameter:
-- 	[url=http://example.com/path]text [b]label[/b][/url]
-- the `[b]...[/b]` results in bold text.
--
-- There are also "void" elements that are self-closing and never get an end
-- tag, like [hr]. The [*] element for list items is also treated as a void
-- tag, marking the beginning of items but not enclosing them.

local TAGS = {
	b      = {nesting = true},
	i      = {nesting = true},
	u      = {nesting = true},
	s      = {nesting = true},
	sub    = {nesting = true},
	sup    = {nesting = true},
	tt     = {nesting = true},
	left   = {nesting = true},
	right  = {nesting = true},
	center = {nesting = true},

	quote  = {nesting = true},
	code   = {nesting = false},
	img    = {nesting = false},
	url    = {nesting = false, nesting_with_param = true},

	size   = {nesting = true},
	hr     = {nesting = false, void = true},

	list   = {nesting = true},
	["*"]  = {nesting = false, void = true},

	table  = {nesting = true},
	tr     = {nesting = true},
	td     = {nesting = true},
	th     = {nesting = true},
}

local function assert_not_nil(x, msg)
	assert(x ~= nil, msg)
	return x
end

local function lookup_tag(tag)
	return assert_not_nil(TAGS[tag], string.format("%q missing from TAGS", tag))
end

-- We first process the Pandoc AST into a linear sequence of tokens, where a
-- token is one of the yield_* types below: a start tag, an end tag, text to be
-- escaped, raw text to be output without escaping, a carriage return, a blank
-- line, or a special token that cancels blank lines.
--
-- The reason to emit a preliminary sequence of tokens, rather than just hav
-- each AST node produce some text directly, is for proper escaping of text
-- that may be broken across nodes--notably of hyperlinks, which are supposed
-- to be escaped to prevent auto-linkification by the BBCode parser. Consider
-- this HTML input:
-- 	<code><span>http:</span><span>//example.com</span></code>
-- Because HTML <span> has no representation in BBCode, if we were to have each
-- pandoc.Span node simply return its escaped text contents, the above would
-- render to the BBCode:
-- 	[tt]http://example.com[/tt]
-- Because the text was not a link in the input, it should not be a link in the
-- output. But this output text is not escaped and will wrongly be linkified by
-- the BBCode parser.
--
-- The intermediate sequence of tokens is a fix for this problem. For this
-- example, we first produce the tokens:
-- 	{type = "start_tag", tag = "tt"}
-- 	{type = "text", text = "http:"}
-- 	{type = "text", text = "//example.com"}
-- 	{type = "end_tag", tag = "tt"}
-- Then, there is an intermediate step that consolidates adjacent "text"
-- tokens:
-- 	{type = "start_tag", tag = "tt"}
-- 	{type = "text", text = "http://example.com"}
-- 	{type = "end_tag", tag = "tt"}
-- With adjacent text tokens being joined into complete strings, the BBCode
-- output can be properly escaped:
-- 	[tt]http[noparse]://[/noparse]example.com[/tt]
--
-- The cancel_blankline token is a workaround for a visual issue with list
-- rendering on TASVideos. In short, after a [list] element, we want only a
-- carriage return, not a blank line. By itself, a cancel_blankline token does
-- nothing. Adjacent to any number of blankline tokens (before or after), it
-- cancels the blanklines and turns them into a single cr token.

local function yield_start_tag(tag, param)
	coroutine.yield({type = "start_tag", tag = tag, param = param})
end

local function yield_end_tag(tag)
	coroutine.yield({type = "end_tag", tag = tag})
end

local function yield_text(text)
	coroutine.yield({type = "text", text = text})
end

local function yield_raw_text(text)
	coroutine.yield({type = "raw_text", text = text})
end

local function yield_cr()
	coroutine.yield({type = "cr"})
end

local function yield_blankline()
	coroutine.yield({type = "blankline"})
end

local function yield_cancel_blankline()
	coroutine.yield({type = "cancel_blankline"})
end

-- Keep track of footnote bodies in this table, in order to output the note
-- bodies the bottom of the document. The table is added to by Inlines.Note.
local footnotes = {}

-- Tables of tokenization functions for Pandoc node types. This is like the
-- pandoc.scaffolding.Writer machinery, though we do not actually use
-- pandoc.scaffolding.Writer. tokenize_doc is called on the top-level
-- pandoc.Pandoc, and in turn recursively calls tokenize_blocks and
-- tokenize_inlines, which consult the Blocks and Inlines tables of per-node
-- tokenization functions.

local Blocks = {}
local Inlines = {}

local function tokenize_blocks(blocks, opts)
	assert_not_nil(opts, "tokenize_blocks opts")
	for i, el in ipairs(blocks) do
		if i > 1 then yield_blankline() end
		local tokenize = assert(Blocks[el.tag], string.format("missing Blocks[%q]", el.tag))
		tokenize(el, opts)
	end
end

local function tokenize_inlines(inlines, opts)
	assert_not_nil(opts, "tokenize_inlines opts")
	for _, el in ipairs(inlines) do
		local tokenize = assert(Inlines[el.tag], string.format("missing Inlines[%q]", el.tag))
		tokenize(el, opts)
	end
end

-- Put start and end BBCode tags around some other tokens. This function yields
-- a start tag/param, then calls fn with no arguments, then yields an end tag.
--
-- param is optional, and for that matter tag is also optional: if tag is nil,
-- this function just calls fn, without adding any start or end tags. sep is an
-- optional function that returns tokens to insert between the tags and the
-- result of calling fn.
local function enclose(tag, param, fn, sep)
	if tag == nil then
		assert(param == nil, param)
		fn()
	else
		yield_start_tag(tag, param)
		if sep then sep() end
		fn()
		if sep then sep() end
		yield_end_tag(tag)
	end
end

local function enclose_blocks(tag, param, blocks, opts)
	enclose(tag, param, function()
		tokenize_blocks(blocks, opts)
	end)
end

local function enclose_inlines(tag, param, inlines, opts)
	enclose(tag, param, function()
		tokenize_inlines(inlines, opts)
	end)
end

local function enclose_text(tag, param, text)
	enclose(tag, param, function()
		yield_text(text)
	end)
end

-- Helper function for pandoc.BulletList and pandoc.OrderedList. param should
-- be either nil (for a bullet list) or "1" (for an ordered list).
local function tokenize_list(param, items, opts)
	enclose("list", param, function ()
		for i, item in ipairs(items) do
			if i > 1 then yield_cr() end
			yield_start_tag("*")
			tokenize_blocks(item, opts)
		end
	end, yield_cr)
	-- The Bootstrap CSS used by TASVideos has a large margin-bottom after
	-- top-level lists:
	-- https://github.com/twbs/bootstrap/blob/v5.3.1/dist/css/bootstrap.css#L294
	-- We want to emit a blank line between most block elements, but for
	-- [list] specifically, we want just a carriage return, because a blank
	-- line visually leaves too much space. The cancel_blankline token
	-- removes the blankline token that tokenize_blocks adds by default,
	-- and converts it to a cr token.
	--
	-- There's no margin-bottom after nested lists, but the rendering is
	-- the same whether we use a carriage return or a blank line in those
	-- cases, so there is no need to treat them specially.
	-- https://github.com/twbs/bootstrap/blob/v5.3.1/dist/css/bootstrap.css#L301
	yield_cancel_blankline()
end

function Blocks.BlockQuote(el, opts)
	enclose_blocks("quote", nil, el.content, opts)
end

function Blocks.BulletList(el, opts)
	tokenize_list(nil, el.content, opts)
end

-- CodeBlock element classes that are known not to be language tags.
local NON_LANGUAGE_CLASSES = {}
for _, class in ipairs({
	"numberLines",
	"sourceCode",
}) do
	NON_LANGUAGE_CLASSES[class] = true
end

function Blocks.CodeBlock(el, opts)
	-- The parameter to the [code] tag can be either a language name or a
	-- filename. If it contains a dot character, it's interpreted as a
	-- filename, otherwise as a language name:
	-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/Node.cs#L300
	-- We may get a language in el.attr.classes and/or a filename in
	-- el.attr.attributes["filename"]. We can represent at most one of
	-- them. We prefer a filename, as long as it contains at least one dot
	-- and will be recognized as such. Otherwise we take the first class
	-- that might plausibly be a language name (which must *not* contain a
	-- dot).
	local param = el.attr.attributes["filename"]
	if param == nil or not string.match(param, "%.") then
		-- The language tag, if present, is one of the members of
		-- el.attr.classes. Using the shortcut syntax,
		-- 	```haskell
		-- the language tag will be the first class. Other conventional
		-- class names may appear, such as numberLines and sourceCode.
		-- Take the first class that is not one of the known
		-- non-language classes, and that doesn't contain a dot, and
		-- interpret it as the language tag.
		param = el.attr.classes:find_if(function (class)
			return not NON_LANGUAGE_CLASSES[class] and not string.match(class, "%.")
		end)
	end
	enclose_text("code", param, el.text)
end

function Blocks.DefinitionList(el, opts)
	for i, item in ipairs(el.content) do
		local terms = item[1]
		local defns = item[2]
		if i > 1 then yield_cr() end
		enclose_inlines("b", nil, terms, opts)
		yield_cr()
		tokenize_blocks({pandoc.BulletList(defns)}, opts)
	end
end

function Blocks.Div(el, opts)
	tokenize_blocks(el.content, opts)
end

function Blocks.Figure(el, opts)
	-- Figure
	tokenize_blocks(el.content, opts)
	yield_cr()
	-- Caption
	enclose_blocks("b", nil, el.caption.long, opts)
end

function Blocks.Header(el, opts)
	-- There are no header elements in the BBCode markup, so fake it with
	-- bold and changing the font size.
	-- https://html.spec.whatwg.org/multipage/rendering.html#sections-and-headings
	local size = ({
		[1] = "2em",
		[2] = "1.5em",
		[3] = "1.17em",
		[4] = "1em",
		[5] = "0.83em",
	})[el.level] or "0.67em"
	local bold = pandoc.Inlines({pandoc.Strong(el.content)})
	enclose_inlines("size", size, bold, opts)
end

function Blocks.HorizontalRule(el, opts)
	yield_start_tag("hr")
end

function Blocks.LineBlock(el, opts)
	for i, line in ipairs(el.content) do
		if i > 1 then tokenize_inlines({pandoc.LineBreak()}, opts) end
		tokenize_inlines(line, opts)
	end
end

function Blocks.OrderedList(el, opts)
	assert(el.listAttributes.start == 1, el.listAttributes)
	-- ignore el.listAttributes.style
	-- ignore el.listAttributes.delimiter
	tokenize_list("1", el.content, opts)
end

function Blocks.Para(el, opts)
	tokenize_inlines(el.content, opts)
end

function Blocks.Plain(el, opts)
	tokenize_inlines(el.content, opts)
end

function Blocks.RawBlock(el, opts)
	if el.format == MY_FORMAT then
		yield_raw_text(el.text)
	else
		pandoc.log.warn(string.format("not rendered: %q", tostring(el)))
	end
end

-- We can represent most features of the pandoc.Table type, such as mid-table
-- headings, so we don't have to go to the extremity of calling
-- pandoc.utils.to_simple_table. But one thing we cannot represent is the
-- row_span and col_span of cells. This function breaks each m×n cell into 1×1
-- cells, with the original content in the upper left cell.
local function despan_table(el)
	-- A list of integers indicating for how many more rows the given
	-- column should hold a blank 1×1 cell. When the counter reaches zero,
	-- the corresponding element is set to nil and the column becomes
	-- eligible for new cells again.
	local pending = {}
	-- Decrement each element of pending by 1, and remove elements that
	-- become 0. Modifies pending in place.
	local function age_pending()
		for col in pairs(pending) do
			pending[col] = pending[col] - 1
			if pending[col] == 0 then
				pending[col] = nil
			end
		end
	end
	-- Chop up col_span cells and distribute them among already pending
	-- columns. Update pending according to the row_spans of the cells in
	-- this row. Modifies pending in place.
	local function despan_rows(rows)
		local new_rows = {}
		for _, row in ipairs(rows) do
			local new_cells = {}

			-- Age pending with each new row.
			age_pending()
			local col = 1
			for _, cell in ipairs(row.cells) do
				-- Insert blank 1×1 cells until finding a
				-- column that is not already pending.
				while pending[col] ~= nil do
					table.insert(new_cells, pandoc.Cell({}, "AlignDefault", 1, 1, cell.attr))
					col = col + 1
				end

				-- Insert this cell.
				table.insert(new_cells, pandoc.Cell(cell.contents, cell.alignment, 1, 1, cell.attr))
				pending[col] = cell.row_span
				col = col + 1

				-- Insert blanks up to this cell's col_span.
				-- Any columns touched become pending for the
				-- next row.
				for _ = 2, cell.col_span do
					table.insert(new_cells, pandoc.Cell({}, "AlignDefault", 1, 1, cell.attr))
					if pending[col] == nil then
						pending[col] = cell.row_span
					else
						pending[col] = math.max(pending[col], cell.row_span)
					end
					col = col + 1
				end
			end

			table.insert(new_rows, pandoc.Row(new_cells, row.attr))
		end
		return new_rows
	end

	local head = pandoc.TableHead(despan_rows(el.head.rows), el.head.attr)
	local bodies = {}
	for _, body in ipairs(el.bodies) do
		table.insert(bodies, {
			head = despan_rows(body.head),
			body = despan_rows(body.body),
			row_head_columns = body.row_head_columns,
			attr = body.attr,
		})
	end
	local foot = pandoc.TableFoot(despan_rows(el.foot.rows), el.foot.attr)
	-- Pandoc should have normalized the table it gave us, such that there
	-- are no row_spans that extend past the final row.
	age_pending()
	assert(next(pending) == nil, string.format("pending row_span at end of table"))

	return pandoc.Table(el.caption, el.colspecs, head, bodies, foot, el.attr)
end

local function table_cell_align_tag(cell_alignment, col_alignment)
	local alignment = cell_alignment
	if alignment == "AlignDefault" then
		alignment = col_alignment
	end
	if alignment == "AlignDefault" then
		return nil
	else
		return assert(({
			AlignLeft = "left",
			AlignRight = "right",
			AlignCenter = "center",
		})[alignment], string.format("unknown alignment %q", alignment))
	end
end

local function tokenize_table_cell(cell, cell_tag, col_alignment, opts)
	assert(cell.col_span == 1, string.format("col_span == %d", cell.col_span))
	assert(cell.row_span == 1, string.format("row_span == %d", cell.row_span))
	local align_tag = table_cell_align_tag(cell.alignment, col_alignment)
	enclose(cell_tag, nil, function ()
		enclose_blocks(align_tag, nil, cell.contents, opts)
	end)
end

-- If row_head_columns is nil, this is a header row: use [th] for every cell.
-- Otherwise row_head_columns is an integer that tells how many initial columns
-- to use [th] for; the rest will use [td].
local function tokenize_table_row(row, row_head_columns, col_alignments, opts)
	enclose("tr", nil, function ()
		local col = 1
		for i, cell in ipairs(row.cells) do
			if i > 1 then yield_cr() end
			local cell_tag
			if row_head_columns == nil or col <= row_head_columns then
				cell_tag = "th"
			else
				cell_tag = "td"
			end
			tokenize_table_cell(cell, cell_tag, col_alignments[col], opts)
			col = col + cell.col_span
		end
	end, yield_cr)
end

function Blocks.Table(el, opts)
	el = despan_table(el)

	-- Caption
	if next(el.caption.long) then
		enclose_blocks("b", nil, el.caption.long, opts)
		yield_cr()
	end
	-- Table
	local col_alignments = {}
	for i, colspec in ipairs(el.colspecs) do
		col_alignments[i] = colspec[1]
	end
	-- Helper function to yield a cr before every row but the first.
	local first = true
	local function yield_sep()
		if not first then yield_cr() end
		first = false
	end
	enclose("table", nil, function ()
		for _, row in ipairs(el.head.rows) do
			yield_sep()
			tokenize_table_row(row, nil, col_alignments, opts)
		end
		for _, body in ipairs(el.bodies) do
			for _, row in ipairs(body.head) do
				yield_sep()
				tokenize_table_row(row, nil, col_alignments, opts)
			end
			for _, row in ipairs(body.body) do
				yield_sep()
				tokenize_table_row(row, body.row_head_columns, col_alignments, opts)
			end
		end
		for _, row in ipairs(el.foot.rows) do
			yield_sep()
			tokenize_table_row(row, nil, col_alignments, opts)
		end
	end, yield_cr)
end

function Inlines.Cite(el, opts)
	-- You might want to set the link-citations metadata field to false
	-- (`-M link-citations=false` on the command line) to avoid #ref links
	-- to nowhere from appearing in citation markers.
	tokenize_inlines(el.content, opts)
end

function Inlines.Code(el, opts)
	enclose_text("tt", nil, el.text, opts)
end

function Inlines.Emph(el, opts)
	enclose_inlines("i", nil, el.content, opts)
end

function Inlines.Image(el, opts)
	-- ignore el.caption
	-- ignore el.title
	-- Include a size param only if w and h are provided and have integer
	-- values. (Or if just w is provided.)
	local w = el.attr.attributes.width  and tonumber(el.attr.attributes.width, 10)
	local h = el.attr.attributes.height and tonumber(el.attr.attributes.height, 10)
	local size
	if w and h then
		size = string.format("%dx%d", w, h)
	elseif w then
		size = string.format("%d", w)
	end
	-- Convert el.src to a data URI if the embed_resources extension is
	-- used.
	local src = el.src
	if opts.extensions:includes("embed_resources") then
		local mime_type, data = pandoc.mediabag.fetch(el.src)
		assert(mime_type, el.src)
		src = pandoc.mediabag.make_data_uri(mime_type, data)
	end
	enclose_text("img", size, src, opts)
end

function Inlines.LineBreak(el, opts)
	-- Always a literal newline character, not a collapsing "cr" token.
	-- The TASVideos forum CSS uses `white-space: pre-wrap`:
	-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos/wwwroot/css/partials/_customizations.scss#L367
	-- which means that every line break counts: multiple consecutive line
	-- breaks should be preserved and not collapsed.
	yield_text("\n")
end

function Inlines.Link(el, opts)
	-- If the element has the class "uri", this is a bare URL link. Prefer
	-- to output it as `[url]http://example.com[/url]`. But do this only if
	-- el.target and el.content are equal: they may differ due to percent
	-- escaping, for example.
	if el.attr.classes:includes("uri") and el.target == pandoc.utils.stringify(el.content) then
		enclose_inlines("url", nil, el.content, opts)
	else
		-- We expect Pandoc to have hex-escaped link.target, so it
		-- meets the param syntax check in enclose_inlines. Otherwise
		-- an error will be raised.
		enclose_inlines("url", el.target, el.content, opts)
	end
end

-- Convert a pandoc.Math element into plain Pandoc inlines, if possible. This
-- only works for math that is not too fancy. Returns the inlines if the
-- conversion is possible; otherwise returns nil. (In this respect it differs
-- from the Pandoc function texMathToInlines, which returns a fallback in case
-- of error.)
local function tex_math_to_inlines(mathtype, inp)
	-- We don't have access to Pandoc's texMathToInlines function, so we
	-- employ a hack to access indirectly. We create a new Pandoc document
	-- containing just the Math we are interested in and convert it to an
	-- HTML string, taking advantage of the fact that the HTML writer uses
	-- texMathToInlines when html_math_method = "plain" in WriterOptions.
	-- Then we parse the HTML string back into an AST to recover the
	-- inlines. We don't get an explicit error when the conversion fails
	-- inside the HTML, so in order to check for error, we do a preliminary
	-- conversion using the plain writer and check for the output starting
	-- with '$', which is true of the plain writer's fallback rendering.
	-- https://github.com/jgm/pandoc/discussions/11399

	-- Create a new pandoc.Pandoc containing just the desired Math element.
	local doc = pandoc.Pandoc({pandoc.Plain({pandoc.Math(mathtype, inp)})})
	-- Write to plain format to check if conversion is possible.
	local plain = pandoc.write(doc, "plain", {html_math_method = "plain", wrap_text = "wrap-none"})
	if string.match(plain, "^%$") then
		-- The output starts with '$': conversion failed.
		return nil
	end
	-- If the conversion succeeded for the plain writer, assume it will
	-- also succeed for the HTML writer.
	local html = pandoc.write(doc, "html", {html_math_method = "plain", wrap_text = "wrap-none"})
	-- Parse the HTML string back into a pandoc.Pandoc and extract the
	-- inlines.
	return pandoc.utils.blocks_to_inlines(pandoc.read(html, "html").blocks)
end

function Inlines.Math(el, opts)
	-- Represent the math using plain inlines, if possible.
	local inlines = tex_math_to_inlines(el.mathtype, el.text)
	if inlines then
		if el.mathtype == "InlineMath" then
			tokenize_inlines(inlines, opts)
		elseif el.mathtype == "DisplayMath" then
			yield_blankline()
			tokenize_blocks({pandoc.Plain(inlines)}, opts)
			yield_blankline()
		else
			error(el.mathtype)
		end
	else
		-- Otherwise, wrap the LaTeX in a Code or CodeBlock element.
		if el.mathtype == "InlineMath" then
			tokenize_inlines({pandoc.Code("$" .. el.text .. "$", {class = "latex"})}, opts)
		elseif el.mathtype == "DisplayMath" then
			tokenize_blocks({pandoc.CodeBlock("$$" .. el.text .. "$$", {class = "latex"})}, opts)
		else
			error(el.mathtype)
		end
	end
end

local function footnote_marker(n, opts)
	return pandoc.Inlines({pandoc.Superscript(pandoc.Str(n))})
end

function Inlines.Note(el, opts)
	-- Insert the note body into the global footnotes table, to output it
	-- at the end of tokenize_doc.
	table.insert(footnotes, el.content)
	-- Output the footnote marker.
	tokenize_inlines(footnote_marker(#footnotes), opts)
end

function Inlines.Quoted(el, opts)
	local q = assert(({
		SingleQuote = {open = [[']], close = [[']]},
		DoubleQuote = {open = [["]], close = [["]]},
	})[el.quotetype], el.quotetype)
	yield_text(q.open)
	tokenize_inlines(el.content, opts)
	yield_text(q.close)
end

function Inlines.RawInline(el, opts)
	if el.format == MY_FORMAT then
		yield_raw_text(el.text)
	else
		pandoc.log.warn(string.format("not rendered: %q", tostring(el)))
	end
end

function Inlines.SmallCaps(el, opts)
	tokenize_inlines(el.content, opts)
end

function Inlines.SoftBreak(el, opts)
	yield_text(" ")
end

function Inlines.Space(el, opts)
	-- A literal space character, not the reflowable pandoc.layout.space.
	-- Line breaks in the input are always interpreted as line breaks by
	-- the parser, so retain long lines.
	yield_text(" ")
end

function Inlines.Span(el, opts)
	tokenize_inlines(el.content, opts)
end

function Inlines.Str(el, opts)
	yield_text(el.text)
end

function Inlines.Strikeout(el, opts)
	enclose_inlines("s", nil, el.content, opts)
end

function Inlines.Strong(el, opts)
	enclose_inlines("b", nil, el.content, opts)
end

function Inlines.Subscript(el, opts)
	enclose_inlines("sub", nil, el.content, opts)
end

function Inlines.Superscript(el, opts)
	enclose_inlines("sup", nil, el.content, opts)
end

function Inlines.Underline(el, opts)
	enclose_inlines("u", nil, el.content, opts)
end

-- We maintain a stack to keep track of what BBCode tags are open as we write
-- the output. The context of open tags affects how the BBCode is parsed, and
-- therefore affects how we must do escaping in what we output. The main
-- consideration is whether the element at the top of the stack (the most
-- recent start tag) permits nested child elements.
--
-- If nested child elements are permitted, then the parser will look for and
-- interpret start tags and URLs in the input, and we must escape them (using
-- [noparse]) to prevent such interpretation.
--
-- If nested child elements are not permitted, then the parser will not
-- interpret anything that looks like a start tag or a URL, but copy it
-- verbatim to the output. Therefore we must *not* escape them. Most end tags
-- are copied to the output verbatim in the same way, with the sole exception
-- of an end tag that matches the element at the top of the tag stack. That one
-- end tag is looked for so that that open element can be closed. If we are
-- asked to output text that matches such an end tag, we raise an error,
-- because we have no way to escape it.
--
-- Before https://github.com/TASVideos/tasvideos/pull/2248, end tags in
-- no-children-allowed contexts would match anywhere on the stack, not just at
-- the top. But we don't have to worry about that.

local function nesting_allowed(stack)
	local head = stack[#stack]
	if head then
		local tag = lookup_tag(head.tag)
		return assert_not_nil(tag.nesting) or (head.param ~= nil and tag.nesting_with_param)
	else
		-- Empty stack means top level, tags are allowed.
		return true
	end
end

local function url_autolinking(stack)
	for _, elem in ipairs(stack) do
		if elem.tag == "url" then
			return false
		end
	end
	return true
end

-- Strings to escape with [noparse]. Generally we have to escape BBCode tags
-- and URLs. In the special case of being inside a [url] tag already, we can
-- get away with escaping just BBCode tags.
local ESCAPE_BBCODE_URL_RE = re.compile([[ "["+ / "://" ]])
local ESCAPE_BBCODE_RE = re.compile([[ "["+ ]])
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L457
-- Technically the character class should also exclude \p{C} (control and
-- "other" characters).
local CLOSING_TAG_PATTERN = "%[/([^%[%]=/]+)%]"

local function escape(text, stack)
	if nesting_allowed(stack) then
		-- Nested child elements are allowed here, so we may use
		-- noparse escaping. We must escape nested BBCode tags to
		-- prevent them from being interpreted by the parser, and
		-- URL-like strings to prevent them from being autolinked.
		local escape_re
		if url_autolinking(stack) then
			-- Inside a [url] tag, URL autolinking is disabled, so
			-- we don't escape URLs. Escaping them would do no
			-- harm, but we avoid doing so for cleaner markup.
			escape_re = ESCAPE_BBCODE_URL_RE
		else
			escape_re = ESCAPE_BBCODE_RE
		end
		-- Escaping just the "[" of a BBCode tag, like so:
		-- 	[noparse][[/noparse]/center]
		-- is more robust than escaping the whole tag:
		-- 	[noparse][/center][/noparse]
		-- This way, we can escape the strings "[noparse]" and
		-- "[/noparse]" themselves. Looking for and escaping the single
		-- character "[" is easier than matching tags precisely, at the
		-- possible cost of some harmless unnecessary escaping.
		return re.gsub(text, escape_re, "[noparse]%0[/noparse]")
	else
		-- Nested children are not allowed at this point, which means
		-- we cannot use noparse escaping. We do not need to escape
		-- start tags or URLs, and end tags are likewise safe as long
		-- as they do not match the tag at the top of the stack. Raise
		-- an error if an end tag matches the top of the stack and
		-- therefore would be misinterpreted by the parser.
		local head = assert_not_nil(stack[#stack])
		for tag in string.gmatch(text, CLOSING_TAG_PATTERN) do
			if tag == head.tag then
				error(string.format("cannot escape [/%s] in %q", tag, text))
			end
		end
		return text
	end
end

-- Check that a tag name is syntactically valid.
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L439
local function tag_is_valid(tag)
	return string.match(tag, "^[^%[%]=/]+$")
end

-- A BBCode parameter can contain '[' and ']' characters, but only if they are
-- in balanced pairs.
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L441
-- Technically the character class should also exclude \p{C} (control and
-- "other" characters).
local PARAM_RE = re.compile([[
full <- seq !.
seq <- ([^][] / balanced)*
balanced <- "[" seq "]"
]])

local function param_is_valid(param)
	return re.match(param, PARAM_RE) ~= nil
end

local function start_tag(tag, param)
	assert(tag_is_valid(tag), tag)
	if param == nil then
		return "[" .. tag .. "]"
	else
		-- Check that param is syntactically valid.
		assert(param_is_valid(param), string.format("cannot escape param: %q", param))
		-- If param begins and ends with quote characters, they will
		-- be stripped by the parser. So add another pair of quotes to
		-- protect them.
		-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L287
		if string.match(param, "^\".*\"$") then
			param = "\"" .. param .. "\""
		end
		return "[" .. tag .. "=" .. param .. "]"
	end
end

local function end_tag(tag)
	assert(tag_is_valid(tag), tag)
	return "[/" .. tag .. "]"
end

local function render_token(token, stack)
	if token.type == "start_tag" then
		return start_tag(token.tag, token.param)
	elseif token.type == "end_tag" then
		return end_tag(token.tag)
	elseif token.type == "text" then
		return escape(token.text, stack)
	elseif token.type == "raw_text" then
		return token.text
	elseif token.type == "cr" then
		return pandoc.layout.cr
	elseif token.type == "blankline" then
		return pandoc.layout.blankline
	else
		error(token.type)
	end
end

local function render_tokens(tokens)
	local parts = {}
	local stack = {}

	for token in tokens do
		if token.type == "end_tag" then
			local tag = assert(table.remove(stack), string.format("empty stack for %q", token.tag))
			assert(token.tag == tag.tag, string.format("popping %q, found %q", token.tag, tag.tag))
		end

		table.insert(parts, render_token(token, stack))

		if token.type == "start_tag" and not lookup_tag(token.tag).void then
			assert(nesting_allowed(stack), token.tag)
			table.insert(stack, {tag = token.tag, param = token.param})
		end
	end

	return pandoc.layout.concat(parts)
end

-- Merge adjacent text tokens and let cancel_blankline tokens cancel blankline
-- tokens.
local function consolidate_tokens(tokens)
	local text_buf = {}
	local function flush_text()
		if next(text_buf) then
			coroutine.yield({type = "text", text = table.concat(text_buf)})
			text_buf = {}
		end
	end
	local blanklines_count = 0
	local cancel_blankline = false
	local function flush_blanklines()
		if cancel_blankline then
			if blanklines_count > 0 then
				yield_cr()
			end
		else
			for i = 1, blanklines_count do
				yield_blankline()
			end
		end
		blanklines_count = 0
		cancel_blankline = false
	end
	for token in tokens do
		if token.type ~= "text" then
			flush_text()
		end
		if token.type ~= "blankline" and token.type ~= "cancel_blankline" then
			flush_blanklines()
		end
		if token.type == "text" then
			table.insert(text_buf, token.text)
		elseif token.type == "blankline" then
			blanklines_count = blanklines_count + 1
		elseif token.type == "cancel_blankline" then
			cancel_blankline = true
		else
			coroutine.yield(token)
		end
	end
	flush_text()
	flush_blanklines()
end

local function tokenize_doc(doc, opts)
	assert_not_nil(opts)

	tokenize_blocks(doc.blocks, opts)

	-- Output footnote bodies, if any.
	if next(footnotes) then
		yield_blankline()
		tokenize_blocks({pandoc.HorizontalRule()}, opts)
		yield_cr()
		-- The default font size is 12 = 1em:
		-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/Node.cs#L463
		-- Take inspiration from TeX \footnotesize, which for 12pt is 10pt.
		enclose_blocks("size", "0.83em", {pandoc.OrderedList(footnotes)}, opts)
	end
end

function Writer(doc, opts)
	local tokens = coroutine.wrap(function () tokenize_doc(doc, opts) end)
	local tokens = coroutine.wrap(function () consolidate_tokens(tokens) end)
	return render_tokens(tokens):render()
end