TASVideos Pandoc

tasvideos_forum.lua at [53d358e416]
Login

tasvideos_forum.lua at [53d358e416]

File tasvideos_forum.lua artifact 9d35dd4bee part of check-in 53d358e416


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
   148
   149
   150
   151
   152
   153
   154
   155
   156
   157
   158
   159
   160
   161
   162
   163
   164
   165
   166
   167
   168
   169
   170
   171
   172
   173
   174
   175
   176
   177
   178
   179
   180
   181
   182
   183
   184
   185
   186
   187
   188
   189
   190
   191
   192
   193
   194
   195
   196
   197
   198
   199
   200
   201
   202
   203
   204
   205
   206
   207
   208
   209
   210
   211
   212
   213
   214
   215
   216
   217
   218
   219
   220
   221
   222
   223
   224
   225
   226
   227
   228
   229
   230
   231
   232
   233
   234
   235
   236
   237
   238
   239
   240
   241
   242
   243
   244
   245
   246
   247
   248
   249
   250
   251
   252
   253
   254
   255
   256
   257
   258
   259
   260
   261
   262
   263
   264
   265
   266
   267
   268
   269
   270
   271
   272
   273
   274
   275
   276
   277
   278
   279
   280
   281
   282
   283
   284
   285
   286
   287
   288
   289
   290
   291
   292
   293
   294
   295
   296
   297
   298
   299
   300
   301
   302
   303
   304
   305
   306
   307
   308
   309
   310
   311
   312
   313
   314
   315
   316
   317
   318
   319
   320
   321
   322
   323
   324
   325
   326
   327
   328
   329
   330
   331
   332
   333
   334
   335
   336
   337
   338
   339
   340
   341
   342
   343
   344
   345
   346
   347
   348
   349
   350
   351
   352
   353
   354
   355
   356
   357
   358
   359
   360
   361
   362
   363
   364
   365
   366
   367
   368
   369
   370
   371
   372
   373
   374
   375
   376
   377
   378
   379
   380
   381
   382
   383
   384
   385
   386
   387
   388
   389
   390
   391
   392
   393
   394
   395
   396
   397
   398
   399
   400
   401
   402
   403
   404
   405
   406
   407
   408
   409
   410
   411
   412
   413
   414
   415
   416
   417
   418
   419
   420
   421
   422
   423
   424
   425
   426
   427
   428
   429
   430
   431
   432
   433
   434
   435
   436
   437
   438
   439
   440
   441
   442
   443
   444
   445
   446
   447
   448
   449
   450
   451
   452
   453
   454
   455
   456
   457
   458
   459
   460
   461
   462
   463
   464
   465
   466
   467
   468
   469
   470
   471
   472
   473
   474
   475
   476
   477
   478
   479
   480
   481
   482
   483
   484
   485
   486
   487
   488
   489
   490
   491
   492
   493
   494
   495
   496
   497
   498
   499
   500
   501
   502
   503
   504
   505
   506
   507
   508
   509
   510
   511
   512
   513
   514
   515
   516
   517
   518
   519
   520
   521
   522
   523
   524
   525
   526
   527
   528
   529
   530
   531
   532
   533
   534
   535
   536
   537
   538
   539
   540
   541
   542
   543
   544
   545
   546
   547
   548
   549
   550
   551
   552
   553
   554
   555
   556
   557
   558
   559
   560
   561
   562
   563
   564
   565
   566
   567
   568
   569
   570
   571
   572
   573
   574
   575
   576
   577
   578
   579
   580
   581
   582
   583
   584
   585
   586
   587
   588
   589
   590
   591
   592
   593
   594
   595
   596
   597
   598
   599
   600
   601
   602
   603
   604
   605
   606
   607
   608
   609
   610
   611
   612
   613
   614
   615
   616
   617
   618
   619
   620
   621
   622
   623
   624
   625
   626
   627
   628
   629
   630
   631
   632
   633
   634
   635
   636
   637
   638
   639
   640
   641
   642
   643
   644
   645
   646
   647
   648
   649
   650
   651
   652
   653
   654
   655
   656
   657
   658
   659
   660
   661
   662
   663
   664
   665
   666
   667
   668
   669
   670
   671
   672
   673
   674
   675
   676
   677
   678
   679
   680
   681
   682
   683
   684
   685
   686
   687
   688
   689
   690
   691
   692
   693
   694
   695
   696
   697
   698
   699
   700
   701
   702
   703
   704
   705
   706
   707
   708
   709
   710
   711
   712
   713
   714
   715
   716
   717
   718
   719
   720
   721
   722
   723
   724
   725
   726
   727
   728
   729
   730
   731
   732
   733
   734
   735
   736
   737
   738
   739
   740
   741
   742
   743
   744
   745
   746
   747
   748
   749
   750
   751
   752
   753
   754
   755
   756
   757
   758
   759
   760
   761
   762
   763
   764
   765
   766
   767
   768
   769
   770
   771
   772
   773
   774
   775
   776
   777
   778
   779
   780
   781
   782
   783
   784
   785
   786
   787
   788
   789
   790
   791
   792
   793
   794
   795
   796
   797
   798
   799
   800
   801
   802
   803
   804
   805
   806
   807
   808
   809
   810
   811
   812
   813
   814
   815
   816
   817
   818
   819
   820
   821
   822
   823
   824
   825
   826
   827
   828
   829
   830
   831
   832
   833
   834
   835
   836
   837
   838
   839
   840
   841
   842
   843
   844
   845
   846
   847
   848
   849
   850
   851
   852
   853
   854
   855
   856
   857
   858
   859
   860
   861
   862
   863
   864
   865
   866
   867
   868
   869
   870
   871
   872
   873
   874
   875
   876
   877
   878
   879
   880
   881
   882
   883
   884
   885
   886
   887
   888
   889
   890
   891
   892
   893
   894
   895
   896
   897
   898
   899
   900
   901
   902
   903
   904
   905
   906
   907
   908
   909
   910
   911
   912
   913
   914
   915
   916
   917
   918
   919
   920
   921
   922
   923
   924
   925
   926
   927
   928
   929
   930
   931
   932
   933
   934
   935
   936
   937
   938
   939
   940
   941
   942
   943
   944
   945
   946
   947
   948
   949
   950
   951
   952
   953
   954
   955
   956
   957
   958
   959
   960
   961
   962
   963
   964
   965
   966
   967
   968
   969
   970
   971
   972
   973
   974
   975
   976
   977
   978
   979
   980
   981
   982
   983
   984
   985
   986
-- Pandoc custom writer for TASVideos forum markup.
-- https://tasvideos.org/ForumMarkup
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs
--
-- Usage (see https://pandoc.org/MANUAL.html#custom-readers-and-writers):
-- 	pandoc -t tasvideos_forum.lua input.md
-- tasvideos_forum.lua may be given as an absolute or relative path, or it can
-- be placed in the "custom" subdirectory of the user data directory:
-- https://pandoc.org/MANUAL.html#option--data-dir. Many input formats may be
-- used, see https://pandoc.org/MANUAL.html#option--from.
--
-- To output specific BBCode tags, inclusing TASVideos-specific markup such as
-- [movie] and [post], represent them in Markdown with raw attribute tags and a
-- format name of "tasvideos_forum" (see
-- https://pandoc.org/MANUAL.html#generic-raw-attribute). Like so:
-- 	This is `[frames]100[/frames]`{=tasvideos_forum} faster than `[movie]1234[/movie]`{=tasvideos_forum}.
--
-- 	```{=tasvideos_forum}
-- 	[note]
-- 	More details are in [post=1234]this post[/post].
-- 	[/note]
-- 	```
--
-- The download filename for code blocks can be provided by a "filename"
-- attribute, like so:
-- 	``` {filename=test.lua}
-- 	print("hello")
-- 	```
--
-- Image paths/URLs are converted to data URIs when the embed_resources
-- extension is activated. This is similar to the --embed-resources option for
-- the pandoc program.
-- 	pandoc -t tasvideos_forum.lua+embed_resources input.md

-- The format recognized by RawBlock and RawInline.
local MY_FORMAT = "tasvideos_forum"

Extensions = {
	-- Convert [img] tag paths/URLs to data URIs. We implement the flag as
	-- an extension because we cannot access the Pandoc --embed-resources
	-- option: https://github.com/jgm/pandoc/discussions/9978.
	embed_resources = false,
}

-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L80
-- The important piece of information about each BBCode tag, for writing, is
-- whether child elements are allowed in it or not. Some tags, like [b], always
-- allow new child elements; some, like [code], never allow child elements; and
-- some, like [url], allow child elements only if a parameter is set on the
-- tag. For example, in:
-- 	[url]http://example.com/[b]path[/b][/url]
-- the `[b]...[/b]` is a literal part of the URL path. But with a parameter:
-- 	[url=http://example.com/path]text [b]label[/b][/url]
-- the `[b]...[/b]` results in bold text.
--
-- There are also "void" elements that are self-closing and never get an end
-- tag, like [hr]. The [*] element for list items is also treated as a void
-- tag, marking the beginning of items but not enclosing them.

local TAGS = {
	b      = {nesting = true},
	i      = {nesting = true},
	u      = {nesting = true},
	s      = {nesting = true},
	sub    = {nesting = true},
	sup    = {nesting = true},
	tt     = {nesting = true},
	left   = {nesting = true},
	right  = {nesting = true},
	center = {nesting = true},

	quote  = {nesting = true},
	code   = {nesting = false},
	img    = {nesting = false},
	url    = {nesting = false, nesting_with_param = true},

	size   = {nesting = true},
	hr     = {nesting = false, void = true},

	list   = {nesting = true},
	["*"]  = {nesting = false, void = true},

	table  = {nesting = true},
	tr     = {nesting = true},
	td     = {nesting = true},
	th     = {nesting = true},
}

local function assert_not_nil(x, msg)
	assert(x ~= nil, msg)
	return x
end

local function lookup_tag(tag)
	return assert_not_nil(TAGS[tag], string.format("%q missing from TAGS", tag))
end

-- We first process the Pandoc AST into a linear sequence of tokens, where a
-- token is one of the yield_* types below: a start tag, an end tag, text to be
-- escaped, raw text to be output without escaping, a carriage return, a blank
-- line, or a special token that cancels blank lines.
--
-- The reason to emit a preliminary sequence of tokens, rather than just hav
-- each AST node produce some text directly, is for proper escaping of text
-- that may be broken across nodes--notably of hyperlinks, which are supposed
-- to be escaped to prevent auto-linkification by the BBCode parser. Consider
-- this HTML input:
-- 	<code><span>http:</span><span>//example.com</span></code>
-- Because HTML <span> has no representation in BBCode, if we were to have each
-- pandoc.Span node simply return its escaped text contents, the above would
-- render to the BBCode:
-- 	[tt]http://example.com[/tt]
-- Because the text was not a link in the input, it should not be a link in the
-- output. But this output text is not escaped and will wrongly be linkified by
-- the BBCode parser.
--
-- The intermediate sequence of tokens is a fix for this problem. For this
-- example, we first produce the tokens:
-- 	{type = "start_tag", tag = "tt"}
-- 	{type = "text", text = "http:"}
-- 	{type = "text", text = "//example.com"}
-- 	{type = "end_tag", tag = "tt"}
-- Then, there is an intermediate step that consolidates adjacent "text"
-- tokens:
-- 	{type = "start_tag", tag = "tt"}
-- 	{type = "text", text = "http://example.com"}
-- 	{type = "end_tag", tag = "tt"}
-- With adjacent text tokens being joined into complete strings, the BBCode
-- output can be properly escaped:
-- 	[tt]http[noparse]://[/noparse]example.com[/tt]
--
-- The cancel_blankline token is a workaround for a visual issue with list
-- rendering on TASVideos. In short, after a [list] element, we want only a
-- carriage return, not a blank line. By itself, a cancel_blankline token does
-- nothing. Adjacent to any number of blankline tokens (before or after), it
-- cancels the blanklines and turns them into a single cr token.

local function yield_start_tag(tag, param)
	coroutine.yield({type = "start_tag", tag = tag, param = param})
end

local function yield_end_tag(tag)
	coroutine.yield({type = "end_tag", tag = tag})
end

local function yield_text(text)
	coroutine.yield({type = "text", text = text})
end

local function yield_raw_text(text)
	coroutine.yield({type = "raw_text", text = text})
end

local function yield_cr()
	coroutine.yield({type = "cr"})
end

local function yield_blankline()
	coroutine.yield({type = "blankline"})
end

local function yield_cancel_blankline()
	coroutine.yield({type = "cancel_blankline"})
end

-- Keep track of footnote bodies in this table, in order to output the note
-- bodies the bottom of the document. The table is added to by Inlines.Note.
local footnotes = {}

-- Tables of tokenization functions for Pandoc node types. This is like the
-- pandoc.scaffolding.Writer machinery, though we do not actually use
-- pandoc.scaffolding.Writer. tokenize_doc is called on the top-level
-- pandoc.Pandoc, and in turn recursively calls tokenize_blocks and
-- tokenize_inlines, which consult the Blocks and Inlines tables of per-node
-- tokenization functions.

local Blocks = {}
local Inlines = {}

local function tokenize_blocks(blocks, opts)
	assert_not_nil(opts, "tokenize_blocks opts")
	for i, el in ipairs(blocks) do
		if i > 1 then yield_blankline() end
		local tokenize = assert(Blocks[el.tag], string.format("missing Blocks[%q]", el.tag))
		tokenize(el, opts)
	end
end

local function tokenize_inlines(inlines, opts)
	assert_not_nil(opts, "tokenize_inlines opts")
	for _, el in ipairs(inlines) do
		local tokenize = assert(Inlines[el.tag], string.format("missing Inlines[%q]", el.tag))
		tokenize(el, opts)
	end
end

-- Put start and end BBCode tags around some other tokens. This function yields
-- a start tag/param, then calls fn with no arguments, then yields an end tag.
--
-- param is optional, and for that matter tag is also optional: if tag is nil,
-- this function just calls fn, without adding any start or end tags. sep is an
-- optional function that returns tokens to insert between the tags and the
-- result of calling fn.
local function enclose(tag, param, fn, sep)
	if tag == nil then
		assert(param == nil, param)
		fn()
	else
		yield_start_tag(tag, param)
		if sep then sep() end
		fn()
		if sep then sep() end
		yield_end_tag(tag)
	end
end

local function enclose_blocks(tag, param, blocks, opts)
	enclose(tag, param, function()
		tokenize_blocks(blocks, opts)
	end)
end

local function enclose_inlines(tag, param, inlines, opts)
	enclose(tag, param, function()
		tokenize_inlines(inlines, opts)
	end)
end

local function enclose_text(tag, param, text)
	enclose(tag, param, function()
		yield_text(text)
	end)
end

-- Helper function for pandoc.BulletList and pandoc.OrderedList. param should
-- be either nil (for a bullet list) or "1" (for an ordered list).
local function tokenize_list(param, items, opts)
	enclose("list", param, function ()
		for i, item in ipairs(items) do
			if i > 1 then yield_cr() end
			yield_start_tag("*")
			tokenize_blocks(item, opts)
		end
	end, yield_cr)
	-- The Bootstrap CSS used by TASVideos has a large margin-bottom after
	-- top-level lists:
	-- https://github.com/twbs/bootstrap/blob/v5.3.1/dist/css/bootstrap.css#L294
	-- We want to emit a blank line between most block elements, but for
	-- [list] specifically, we want just a carriage return, because a blank
	-- line visually leaves too much space. The cancel_blankline token
	-- removes the blankline token that tokenize_blocks adds by default,
	-- and converts it to a cr token.
	--
	-- There's no margin-bottom after nested lists, but the rendering is
	-- the same whether we use a carriage return or a blank line in those
	-- cases, so there is no need to treat them specially.
	-- https://github.com/twbs/bootstrap/blob/v5.3.1/dist/css/bootstrap.css#L301
	yield_cancel_blankline()
end

function Blocks.BlockQuote(el, opts)
	enclose_blocks("quote", nil, el.content, opts)
end

function Blocks.BulletList(el, opts)
	tokenize_list(nil, el.content, opts)
end

-- CodeBlock element classes that are known not to be language tags.
local NON_LANGUAGE_CLASSES = {}
for _, class in ipairs({
	"numberLines",
	"sourceCode",
}) do
	NON_LANGUAGE_CLASSES[class] = true
end

function Blocks.CodeBlock(el, opts)
	-- The parameter to the [code] tag can be either a language name or a
	-- filename. If it contains a dot character, it's interpreted as a
	-- filename, otherwise as a language name:
	-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/Node.cs#L300
	-- We may get a language in el.attr.classes and/or a filename in
	-- el.attr.attributes["filename"]. We can represent at most one of
	-- them. We prefer a filename, as long as it contains at least one dot
	-- and will be recognized as such. Otherwise we take the first class
	-- that might plausibly be a language name (which must *not* contain a
	-- dot).
	local param = el.attr.attributes["filename"]
	if param == nil or not string.match(param, "%.") then
		-- The language tag, if present, is one of the members of
		-- el.attr.classes. Using the shortcut syntax,
		-- 	```haskell
		-- the language tag will be the first class. Other conventional
		-- class names may appear, such as numberLines and sourceCode.
		-- Take the first class that is not one of the known
		-- non-language classes, and that doesn't contain a dot, and
		-- interpret it as the language tag.
		param = el.attr.classes:find_if(function (class)
			return not NON_LANGUAGE_CLASSES[class] and not string.match(class, "%.")
		end)
	end
	enclose_text("code", param, el.text)
end

function Blocks.DefinitionList(el, opts)
	for i, item in ipairs(el.content) do
		local terms = item[1]
		local defns = item[2]
		if i > 1 then yield_cr() end
		enclose_inlines("b", nil, terms, opts)
		yield_cr()
		tokenize_blocks({pandoc.BulletList(defns)}, opts)
	end
end

function Blocks.Div(el, opts)
	tokenize_blocks(el.content, opts)
end

function Blocks.Figure(el, opts)
	-- Figure
	tokenize_blocks(el.content, opts)
	yield_cr()
	-- Caption
	enclose_blocks("b", nil, el.caption.long, opts)
end

function Blocks.Header(el, opts)
	-- There are no header elements in the BBCode markup, so fake it with
	-- bold and changing the font size.
	-- https://html.spec.whatwg.org/multipage/rendering.html#sections-and-headings
	local size = ({
		[1] = "2em",
		[2] = "1.5em",
		[3] = "1.17em",
		[4] = "1em",
		[5] = "0.83em",
	})[el.level] or "0.67em"
	local bold = pandoc.Inlines({pandoc.Strong(el.content)})
	enclose_inlines("size", size, bold, opts)
end

function Blocks.HorizontalRule(el, opts)
	yield_start_tag("hr")
end

function Blocks.LineBlock(el, opts)
	for i, line in ipairs(el.content) do
		if i > 1 then tokenize_inlines({pandoc.LineBreak()}, opts) end
		tokenize_inlines(line, opts)
	end
end

function Blocks.OrderedList(el, opts)
	assert(el.listAttributes.start == 1, el.listAttributes)
	-- ignore el.listAttributes.style
	-- ignore el.listAttributes.delimiter
	tokenize_list("1", el.content, opts)
end

function Blocks.Para(el, opts)
	tokenize_inlines(el.content, opts)
end

function Blocks.Plain(el, opts)
	tokenize_inlines(el.content, opts)
end

function Blocks.RawBlock(el, opts)
	if el.format == MY_FORMAT then
		yield_raw_text(el.text)
	else
		pandoc.log.warn(string.format("not rendered: %q", tostring(el)))
	end
end

-- We can represent most features of the pandoc.Table type, such as mid-table
-- headings, so we don't have to go to the extremity of calling
-- pandoc.utils.to_simple_table. But one thing we cannot represent is the
-- row_span and col_span of cells. This function breaks each m×n cell into 1×1
-- cells, with the original content in the upper left cell.
local function despan_table(el)
	-- A list of integers indicating for how many more rows the given
	-- column should hold a blank 1×1 cell. When the counter reaches zero,
	-- the corresponding element is set to nil and the column becomes
	-- eligible for new cells again.
	local pending = {}
	-- Decrement each element of pending by 1, and remove elements that
	-- become 0. Modifies pending in place.
	local function age_pending()
		for col in pairs(pending) do
			pending[col] = pending[col] - 1
			if pending[col] == 0 then
				pending[col] = nil
			end
		end
	end
	-- Chop up col_span cells and distribute them among already pending
	-- columns. Update pending according to the row_spans of the cells in
	-- this row. Modifies pending in place.
	local function despan_rows(rows)
		local new_rows = {}
		for _, row in ipairs(rows) do
			local new_cells = {}

			-- Age pending with each new row.
			age_pending()
			local col = 1
			for _, cell in ipairs(row.cells) do
				-- Insert blank 1×1 cells until finding a
				-- column that is not already pending.
				while pending[col] ~= nil do
					table.insert(new_cells, pandoc.Cell({}, "AlignDefault", 1, 1, cell.attr))
					col = col + 1
				end

				-- Insert this cell.
				table.insert(new_cells, pandoc.Cell(cell.contents, cell.alignment, 1, 1, cell.attr))
				pending[col] = cell.row_span
				col = col + 1

				-- Insert blanks up to this cell's col_span.
				-- Any columns touched become pending for the
				-- next row.
				for _ = 2, cell.col_span do
					table.insert(new_cells, pandoc.Cell({}, "AlignDefault", 1, 1, cell.attr))
					if pending[col] == nil then
						pending[col] = cell.row_span
					else
						pending[col] = math.max(pending[col], cell.row_span)
					end
					col = col + 1
				end
			end

			table.insert(new_rows, pandoc.Row(new_cells, row.attr))
		end
		return new_rows
	end

	local head = pandoc.TableHead(despan_rows(el.head.rows), el.head.attr)
	local bodies = {}
	for _, body in ipairs(el.bodies) do
		table.insert(bodies, {
			head = despan_rows(body.head),
			body = despan_rows(body.body),
			row_head_columns = body.row_head_columns,
			attr = body.attr,
		})
	end
	local foot = pandoc.TableFoot(despan_rows(el.foot.rows), el.foot.attr)
	-- Pandoc should have normalized the table it gave us, such that there
	-- are no row_spans that extend past the final row.
	age_pending()
	assert(next(pending) == nil, string.format("pending row_span at end of table"))

	return pandoc.Table(el.caption, el.colspecs, head, bodies, foot, el.attr)
end

local function table_cell_align_tag(cell_alignment, col_alignment)
	local alignment = cell_alignment
	if alignment == "AlignDefault" then
		alignment = col_alignment
	end
	if alignment == "AlignDefault" then
		return nil
	else
		return assert(({
			AlignLeft = "left",
			AlignRight = "right",
			AlignCenter = "center",
		})[alignment], string.format("unknown alignment %q", alignment))
	end
end

local function tokenize_table_cell(cell, cell_tag, col_alignment, opts)
	assert(cell.col_span == 1, string.format("col_span == %d", cell.col_span))
	assert(cell.row_span == 1, string.format("row_span == %d", cell.row_span))
	local align_tag = table_cell_align_tag(cell.alignment, col_alignment)
	enclose(cell_tag, nil, function ()
		enclose_blocks(align_tag, nil, cell.contents, opts)
	end)
end

-- If row_head_columns is nil, this is a header row: use [th] for every cell.
-- Otherwise row_head_columns is an integer that tells how many initial columns
-- to use [th] for; the rest will use [td].
local function tokenize_table_row(row, row_head_columns, col_alignments, opts)
	enclose("tr", nil, function ()
		local col = 1
		for i, cell in ipairs(row.cells) do
			if i > 1 then yield_cr() end
			local cell_tag
			if row_head_columns == nil or col <= row_head_columns then
				cell_tag = "th"
			else
				cell_tag = "td"
			end
			tokenize_table_cell(cell, cell_tag, col_alignments[col], opts)
			col = col + cell.col_span
		end
	end, yield_cr)
end

function Blocks.Table(el, opts)
	el = despan_table(el)

	-- Caption
	if next(el.caption.long) then
		enclose_blocks("b", nil, el.caption.long, opts)
		yield_cr()
	end
	-- Table
	local col_alignments = {}
	for i, colspec in ipairs(el.colspecs) do
		col_alignments[i] = colspec[1]
	end
	-- Helper function to yield a cr before every row but the first.
	local first = true
	local function yield_sep()
		if not first then yield_cr() end
		first = false
	end
	enclose("table", nil, function ()
		for _, row in ipairs(el.head.rows) do
			yield_sep()
			tokenize_table_row(row, nil, col_alignments, opts)
		end
		for _, body in ipairs(el.bodies) do
			for _, row in ipairs(body.head) do
				yield_sep()
				tokenize_table_row(row, nil, col_alignments, opts)
			end
			for _, row in ipairs(body.body) do
				yield_sep()
				tokenize_table_row(row, body.row_head_columns, col_alignments, opts)
			end
		end
		for _, row in ipairs(el.foot.rows) do
			yield_sep()
			tokenize_table_row(row, nil, col_alignments, opts)
		end
	end, yield_cr)
end

function Inlines.Cite(el, opts)
	-- You might want to set the link-citations metadata field to false
	-- (`-M link-citations=false` on the command line) to avoid #ref links
	-- to nowhere from appearing in citation markers.
	tokenize_inlines(el.content, opts)
end

function Inlines.Code(el, opts)
	enclose_text("tt", nil, el.text, opts)
end

function Inlines.Emph(el, opts)
	enclose_inlines("i", nil, el.content, opts)
end

function Inlines.Image(el, opts)
	-- ignore el.caption
	-- ignore el.title
	-- Include a size param only if w and h are provided and have integer
	-- values. (Or if just w is provided.)
	local w = el.attr.attributes.width  and tonumber(el.attr.attributes.width, 10)
	local h = el.attr.attributes.height and tonumber(el.attr.attributes.height, 10)
	local size
	if w and h then
		size = string.format("%dx%d", w, h)
	elseif w then
		size = string.format("%d", w)
	end
	-- Convert el.src to a data URI if the embed_resources extension is
	-- used.
	local src = el.src
	if opts.extensions:includes("embed_resources") then
		local mime_type, data = pandoc.mediabag.fetch(el.src)
		assert(mime_type, el.src)
		src = pandoc.mediabag.make_data_uri(mime_type, data)
	end
	enclose_text("img", size, src, opts)
end

function Inlines.LineBreak(el, opts)
	-- Always a literal newline character, not a collapsing "cr" token.
	-- The TASVideos forum CSS uses `white-space: pre-wrap`:
	-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos/wwwroot/css/partials/_customizations.scss#L367
	-- which means that every line break counts: multiple consecutive line
	-- breaks should be preserved and not collapsed.
	yield_text("\n")
end

function Inlines.Link(el, opts)
	-- If the element has the class "uri", this is a bare URL link. Prefer
	-- to output it as `[url]http://example.com[/url]`. But do this only if
	-- el.target and el.content are equal: they may differ due to percent
	-- escaping, for example.
	if el.attr.classes:includes("uri") and el.target == pandoc.utils.stringify(el.content) then
		enclose_inlines("url", nil, el.content, opts)
	else
		-- We expect Pandoc to have hex-escaped link.target, so it
		-- meets the param syntax check in enclose_inlines. Otherwise
		-- an error will be raised.
		enclose_inlines("url", el.target, el.content, opts)
	end
end

-- Convert a pandoc.Math element into plain Pandoc inlines, if possible. This
-- only works for math that is not too fancy. Returns the inlines if the
-- conversion is possible; otherwise returns nil. (In this respect it differs
-- from the Pandoc function texMathToInlines, which returns a fallback in case
-- of error.)
local function tex_math_to_inlines(mathtype, inp)
	-- We don't have access to Pandoc's texMathToInlines function, so we
	-- employ a hack to access indirectly. We create a new Pandoc document
	-- containing just the Math we are interested in and convert it to an
	-- HTML string, taking advantage of the fact that the HTML writer uses
	-- texMathToInlines when html_math_method = "plain" in WriterOptions.
	-- Then we parse the HTML string back into an AST to recover the
	-- inlines. We don't get an explicit error when the conversion fails
	-- inside the HTML, so in order to check for error, we do a preliminary
	-- conversion using the plain writer and check for the output starting
	-- with '$', which is true of the plain writer's fallback rendering.
	-- https://github.com/jgm/pandoc/discussions/11399

	-- Create a new pandoc.Pandoc containing just the desired Math element.
	local doc = pandoc.Pandoc({pandoc.Plain({pandoc.Math(mathtype, inp)})})
	-- Write to plain format to check if conversion is possible.
	local plain = pandoc.write(doc, "plain", {html_math_method = "plain", wrap_text = "wrap-none"})
	if string.match(plain, "^%$") then
		-- The output starts with '$': conversion failed.
		return nil
	end
	-- If the conversion succeeded for the plain writer, assume it will
	-- also succeed for the HTML writer.
	local html = pandoc.write(doc, "html", {html_math_method = "plain", wrap_text = "wrap-none"})
	-- Parse the HTML string back into a pandoc.Pandoc and extract the
	-- inlines.
	return pandoc.utils.blocks_to_inlines(pandoc.read(html, "html").blocks)
end

function Inlines.Math(el, opts)
	-- Represent the math using plain inlines, if possible.
	local inlines = tex_math_to_inlines(el.mathtype, el.text)
	if inlines then
		if el.mathtype == "InlineMath" then
			tokenize_inlines(inlines, opts)
		elseif el.mathtype == "DisplayMath" then
			yield_blankline()
			tokenize_blocks({pandoc.Plain(inlines)}, opts)
			yield_blankline()
		else
			error(el.mathtype)
		end
	else
		-- Otherwise, wrap the LaTeX in a Code or CodeBlock element.
		if el.mathtype == "InlineMath" then
			tokenize_inlines({pandoc.Code("$" .. el.text .. "$", {class = "latex"})}, opts)
		elseif el.mathtype == "DisplayMath" then
			tokenize_blocks({pandoc.CodeBlock("$$" .. el.text .. "$$", {class = "latex"})}, opts)
		else
			error(el.mathtype)
		end
	end
end

local function footnote_marker(n, opts)
	return pandoc.Inlines({pandoc.Superscript(pandoc.Str(n))})
end

function Inlines.Note(el, opts)
	-- Insert the note body into the global footnotes table, to output it
	-- at the end of tokenize_doc.
	table.insert(footnotes, el.content)
	-- Output the footnote marker.
	tokenize_inlines(footnote_marker(#footnotes), opts)
end

function Inlines.Quoted(el, opts)
	local q = assert(({
		SingleQuote = {open = [[']], close = [[']]},
		DoubleQuote = {open = [["]], close = [["]]},
	})[el.quotetype], el.quotetype)
	yield_text(q.open)
	tokenize_inlines(el.content, opts)
	yield_text(q.close)
end

function Inlines.RawInline(el, opts)
	if el.format == MY_FORMAT then
		yield_raw_text(el.text)
	else
		pandoc.log.warn(string.format("not rendered: %q", tostring(el)))
	end
end

function Inlines.SmallCaps(el, opts)
	tokenize_inlines(el.content, opts)
end

function Inlines.SoftBreak(el, opts)
	yield_text(" ")
end

function Inlines.Space(el, opts)
	-- A literal space character, not the reflowable pandoc.layout.space.
	-- Line breaks in the input are always interpreted as line breaks by
	-- the parser, so retain long lines.
	yield_text(" ")
end

function Inlines.Span(el, opts)
	tokenize_inlines(el.content, opts)
end

function Inlines.Str(el, opts)
	yield_text(el.text)
end

function Inlines.Strikeout(el, opts)
	enclose_inlines("s", nil, el.content, opts)
end

function Inlines.Strong(el, opts)
	enclose_inlines("b", nil, el.content, opts)
end

function Inlines.Subscript(el, opts)
	enclose_inlines("sub", nil, el.content, opts)
end

function Inlines.Superscript(el, opts)
	enclose_inlines("sup", nil, el.content, opts)
end

function Inlines.Underline(el, opts)
	enclose_inlines("u", nil, el.content, opts)
end

-- We maintain a stack to keep track of what BBCode tags are open as we write
-- the output. The context of open tags affects how the BBCode is parsed, and
-- therefore affects how we must do escaping in what we output. The main
-- consideration is whether the element at the top of the stack (the most
-- recent start tag) permits nested child elements.
--
-- If nested child elements are permitted, then the parser will look for and
-- interpret start tags and URLs in the input, and we must escape them (using
-- [noparse]) to prevent such interpretation.
--
-- If nested child elements are not permitted, then the parser will not
-- interpret anything that looks like a start tag or a URL, but copy it
-- verbatim to the output. Therefore we must *not* escape them. Most end tags
-- are copied to the output verbatim in the same way, with the sole exception
-- of an end tag that matches the element at the top of the tag stack. That one
-- end tag is looked for so that that open element can be closed. If we are
-- asked to output text that matches such an end tag, we raise an error,
-- because we have no way to escape it.
--
-- Before https://github.com/TASVideos/tasvideos/pull/2248, end tags in
-- no-children-allowed contexts would match anywhere on the stack, not just at
-- the top. But we don't have to worry about that.

local function nesting_allowed(stack)
	local head = stack[#stack]
	if head then
		local tag = lookup_tag(head.tag)
		return assert_not_nil(tag.nesting) or (head.param ~= nil and tag.nesting_with_param)
	else
		-- Empty stack means top level, tags are allowed.
		return true
	end
end

local function url_autolinking(stack)
	for _, elem in ipairs(stack) do
		if elem.tag == "url" then
			return false
		end
	end
	return true
end

-- Strings to escape with [noparse]. Generally we have to escape BBCode tags
-- and URLs. In the special case of being inside a [url] tag already, we can
-- get away with escaping just BBCode tags.
local ESCAPE_BBCODE_URL_RE = re.compile([[ "["+ / "://" ]])
local ESCAPE_BBCODE_RE = re.compile([[ "["+ ]])
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L457
-- Technically the character class should also exclude \p{C} (control and
-- "other" characters).
local CLOSING_TAG_PATTERN = "%[/([^%[%]=/]+)%]"

local function escape(text, stack)
	if nesting_allowed(stack) then
		-- Nested child elements are allowed here, so we may use
		-- noparse escaping. We must escape nested BBCode tags to
		-- prevent them from being interpreted by the parser, and
		-- URL-like strings to prevent them from being autolinked.
		local escape_re
		if url_autolinking(stack) then
			-- Inside a [url] tag, URL autolinking is disabled, so
			-- we don't escape URLs. Escaping them would do no
			-- harm, but we avoid doing so for cleaner markup.
			escape_re = ESCAPE_BBCODE_URL_RE
		else
			escape_re = ESCAPE_BBCODE_RE
		end
		-- Escaping just the "[" of a BBCode tag, like so:
		-- 	[noparse][[/noparse]/center]
		-- is more robust than escaping the whole tag:
		-- 	[noparse][/center][/noparse]
		-- This way, we can escape the strings "[noparse]" and
		-- "[/noparse]" themselves. Looking for and escaping the single
		-- character "[" is easier than matching tags precisely, at the
		-- possible cost of some harmless unnecessary escaping.
		return re.gsub(text, escape_re, "[noparse]%0[/noparse]")
	else
		-- Nested children are not allowed at this point, which means
		-- we cannot use noparse escaping. We do not need to escape
		-- start tags or URLs, and end tags are likewise safe as long
		-- as they do not match the tag at the top of the stack. Raise
		-- an error if an end tag matches the top of the stack and
		-- therefore would be misinterpreted by the parser.
		local head = assert_not_nil(stack[#stack])
		for tag in string.gmatch(text, CLOSING_TAG_PATTERN) do
			if tag == head.tag then
				error(string.format("cannot escape [/%s] in %q", tag, text))
			end
		end
		return text
	end
end

-- Check that a tag name is syntactically valid.
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L439
local function tag_is_valid(tag)
	return string.match(tag, "^[^%[%]=/]+$")
end

-- A BBCode parameter can contain '[' and ']' characters, but only if they are
-- in balanced pairs.
-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L441
-- Technically the character class should also exclude \p{C} (control and
-- "other" characters).
local PARAM_RE = re.compile([[
full <- seq !.
seq <- ([^][] / balanced)*
balanced <- "[" seq "]"
]])

local function param_is_valid(param)
	return re.match(param, PARAM_RE) ~= nil
end

local function start_tag(tag, param)
	assert(tag_is_valid(tag), tag)
	if param == nil then
		return "[" .. tag .. "]"
	else
		-- Check that param is syntactically valid.
		assert(param_is_valid(param), string.format("cannot escape param: %q", param))
		-- If param begins and ends with quote characters, they will
		-- be stripped by the parser. So add another pair of quotes to
		-- protect them.
		-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/BbParser.cs#L287
		if string.match(param, "^\".*\"$") then
			param = "\"" .. param .. "\""
		end
		return "[" .. tag .. "=" .. param .. "]"
	end
end

local function end_tag(tag)
	assert(tag_is_valid(tag), tag)
	return "[/" .. tag .. "]"
end

local function render_token(token, stack)
	if token.type == "start_tag" then
		return start_tag(token.tag, token.param)
	elseif token.type == "end_tag" then
		return end_tag(token.tag)
	elseif token.type == "text" then
		return escape(token.text, stack)
	elseif token.type == "raw_text" then
		return token.text
	elseif token.type == "cr" then
		return pandoc.layout.cr
	elseif token.type == "blankline" then
		return pandoc.layout.blankline
	else
		error(token.type)
	end
end

local function render_tokens(tokens)
	local parts = {}
	local stack = {}

	for token in tokens do
		if token.type == "end_tag" then
			local tag = assert(table.remove(stack), string.format("empty stack for %q", token.tag))
			assert(token.tag == tag.tag, string.format("popping %q, found %q", token.tag, tag.tag))
		end

		table.insert(parts, render_token(token, stack))

		if token.type == "start_tag" and not lookup_tag(token.tag).void then
			assert(nesting_allowed(stack), token.tag)
			table.insert(stack, {tag = token.tag, param = token.param})
		end
	end

	return pandoc.layout.concat(parts)
end

-- Merge adjacent text tokens and let cancel_blankline tokens cancel blankline
-- tokens.
local function consolidate_tokens(tokens)
	local text_buf = {}
	local function flush_text()
		if next(text_buf) then
			coroutine.yield({type = "text", text = table.concat(text_buf)})
			text_buf = {}
		end
	end
	local blanklines_count = 0
	local cancel_blankline = false
	local function flush_blanklines()
		if cancel_blankline then
			if blanklines_count > 0 then
				yield_cr()
			end
		else
			for i = 1, blanklines_count do
				yield_blankline()
			end
		end
		blanklines_count = 0
		cancel_blankline = false
	end
	for token in tokens do
		if token.type ~= "text" then
			flush_text()
		end
		if token.type ~= "blankline" and token.type ~= "cancel_blankline" then
			flush_blanklines()
		end
		if token.type == "text" then
			table.insert(text_buf, token.text)
		elseif token.type == "blankline" then
			blanklines_count = blanklines_count + 1
		elseif token.type == "cancel_blankline" then
			cancel_blankline = true
		else
			coroutine.yield(token)
		end
	end
	flush_text()
	flush_blanklines()
end

local function tokenize_doc(doc, opts)
	assert_not_nil(opts)

	tokenize_blocks(doc.blocks, opts)

	-- Output footnote bodies, if any.
	if next(footnotes) then
		yield_blankline()
		tokenize_blocks({pandoc.HorizontalRule()}, opts)
		yield_cr()
		-- The default font size is 12 = 1em:
		-- https://github.com/TASVideos/tasvideos/blob/b54ece055c14d7e0c2a2eb61603e42067ffd1912/TASVideos.ForumEngine/Node.cs#L463
		-- Take inspiration from TeX \footnotesize, which for 12pt is 10pt.
		enclose_blocks("size", "0.83em", {pandoc.OrderedList(footnotes)}, opts)
	end
end

function Writer(doc, opts)
	local tokens = coroutine.wrap(function () tokenize_doc(doc, opts) end)
	local tokens = coroutine.wrap(function () consolidate_tokens(tokens) end)
	return render_tokens(tokens):render()
end