module "markdown parser"
struc txt [string] {
common
. db string
.length = $ - .
db 0
}
HTMLHeader txt '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8" /></head><body>', $0d, $0a
HTMLFooter txt $0d, $0a, '</body></html>', $0d, $0a
SizeWriteBuffer = 8192
uglobal
; pSource dd ?
SizeSource dd ?
tHTML dd ?
HashTable rd 65536
HTML rb 4*SizeWriteBuffer
.middle rb SizeWriteBuffer
endg
START_SIZE = 1024*1024
proc ReadTheInputSlow
begin
mov [SizeSource], START_SIZE
mov edi, [_MemoryFreeSpace]
xor esi, esi
.allocloop:
stdcall SpaceAllocate, [SizeSource]
.readloop:
mov ecx, [SizeSource]
lea eax, [edi+esi]
sub ecx, esi
stdcall FileRead, [STDIN], eax, ecx
jc .endoffile
test eax, eax
jz .endoffile
add esi, eax
if options.BenchmarkRead = 1
push eax
stdcall PrintMsg, cRead, eax, cCRLF
pop eax
end if
mov eax, [SizeSource]
sub eax, esi
cmp eax, START_SIZE
jae .readloop
mov eax, [SizeSource]
lea eax, [3*eax]
shr eax, 1
add eax, $fff
and eax, $fffff000
mov [SizeSource], eax
; shl [SizeSource], 1
if options.BenchmarkMemory = 1
pushad
mov eax, [SizeSource]
shr eax, 10
stdcall PrintMsg, cReallocated, eax, cKBytes
stdcall Print, cCRLF
popad
end if
jmp .allocloop
.endoffile:
xor eax, eax
mov [edi+esi], eax ; zero terminated...
mov [SizeSource], esi
return
endp
proc ReadTheInput
begin
stdcall FileSize, [STDIN]
jc .finish
mov ebx, eax
add eax, 256+$fff ; some reserve
and eax, $fffff000
stdcall SpaceAllocate, eax
stdcall FileRead, [STDIN], [_MemoryFreeSpace], ebx
mov [SizeSource], ebx
xor eax, eax
mov edi, [_MemoryFreeSpace]
mov [pSource], edi
mov [edi+ebx], eax
clc
.finish:
return
endp
proc FirstPass
.header_id dd ?
.idString dd ?
begin
mov eax, [SizeSource]
mov edi, HTML
mov esi, [_MemoryFreeSpace]
mov [.header_id], 0
.line_start:
cmp edi, HTML.middle
jb @f
sub edi, HTML
stdcall FileWrite, [STDOUT], HTML, edi
mov edi, HTML
@@:
cmp byte [esi], '#'
je .header
cmp byte [esi], '['
je .link
.skip_to_eol:
mov al, [esi]
lea esi, [esi+1]
cmp al, $0d
je .line_start
cmp al, $0a
je .line_start
test al, al
jnz .skip_to_eol
.end_of_file:
mov dword [edi], '<hr>'
mov word [edi+4], $0a0d
lea edi, [edi+6]
mov [tHTML], edi
return
; create table of contents item.
.header:
mov al, [esi]
lea esi, [esi+1]
cmp al, '#'
je .header
cmp al, ' '
je .header
cmp al, $09
je .header
cmp al, $0d
je .line_start
cmp al, $0a
je .line_start
; create the id
mov eax, [.header_id]
mov ecx, 26
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+1], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+2], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+3], dl
inc [.header_id]
mov dword [edi], '<a i'
lea edi, [edi+4]
mov dword [edi], 'd="_'
lea edi, [edi+4]
pushd [.idString]
popd [edi]
lea edi, [edi+4]
mov dword [edi], '" hr'
mov dword [edi+4], 'ef="'
mov byte [edi+8], '#'
push [.idString]
pop dword [edi+9]
mov word [edi+13], '">'
lea edi, [edi+15]
; copy the header text
dec esi
.copy_loop:
mov al, [esi]
lea esi, [esi+1]
cmp al, $0d
je .end_header
cmp al, $0a
je .end_header
test al, al
jz .end_header
mov [edi], al
lea edi, [edi+1]
jmp .copy_loop
.end_header:
dec esi
mov dword [edi], '</a>'
mov dword [edi+4], '<br>'
lea edi, [edi+8]
jmp .line_start
;link processing
.link:
mov edx, $811C9DC5 ; 2166136261 ; FNV offset basis
mov ecx, esi
.label_loop:
mov al, [esi]
lea esi, [esi+1]
cmp al, $20
je .label_loop
cmp al, $09
je .label_loop
cmp al, $0d
je .line_start
cmp al, $0a
je .line_start
test al, al
jz .end_of_file
xor dl, al
imul edx, $01000193 ; 16777619 ; FNV prime
cmp al, ']'
jne .label_loop
; fold the hash to 16 bit value...
mov ebx, edx
shr ebx, 16
xor ebx, edx
and ebx, $ffff
mov edx, ebx
if options.Benchmark
inc [LinkCount]
end if
; search free slot
.search_slot:
xchg ecx, [HashTable+4*ebx]
test ecx, ecx
jz .skip_to_eol
; conflict...
if options.Benchmark
inc [HashConflicts]
end if
; compare the labels on ecx (the old content of the slot) with [HashTable+4*ebx] (the new pointer)
push edi ecx
mov edi, [HashTable+4*ebx]
.first:
mov al, [ecx]
lea ecx, [ecx+1]
cmp al, $20
je .first
cmp al, $09
je .first
.second:
mov ah, [edi]
lea edi, [edi+1]
cmp ah, $20
je .second
cmp ah, $09
je .second
cmp al, ah
jne .add_not_equal
cmp ah, ']'
jne .first
pop ecx edi
jmp .skip_to_eol
; save it to the next slot
.add_not_equal:
pop ecx edi
inc ebx
and ebx, $ffff
cmp ebx, edx
jne .search_slot
int3 ; full hash table -> to be implemented error processing
endp
fstateHeader = $01
fstatePara = $02
fstateWhite = $04
fstateBold = $08
fstateLink = $10
fstateUnderline = $20
fstateStrikeout = $40
fstateItalic = $80
proc SecondPass
.non_space dd ?
.state dd ?
.last_header dd ?
.header_id dd ?
.idString dd ?
begin
mov esi, [_MemoryFreeSpace]
xor eax, eax
mov [.state], eax
mov [.non_space], eax
mov [.header_id], eax
mov edi, [tHTML]
.line_start:
cmp edi, HTML.middle
jb @f
sub edi, HTML
stdcall FileWrite, [STDOUT], HTML, edi
mov edi, HTML
@@:
xor ecx, ecx
xchg [.non_space], ecx
cmp byte [esi], '['
je .skip
cmp byte [esi], '#'
je .header
test ecx, ecx
jnz .scan_line ; the previous line is not WS
mov dword [edi], ' <p>'
lea edi, [edi+4]
or [.state], fstatePara
.scan_line:
mov al, [esi]
lea esi, [esi+1]
cmp al, ' '
je .white_space
cmp al, $09 ; tab
je .white_space
cmp al, $0d
je .end_of_line
cmp al, $0a
je .end_of_line
test al, al
jz .end_of_line
; it is not white space character...
inc [.non_space]
mov ecx, fstateBold
mov ebx, '<b>'
cmp al, '*'
je .ProcessTag
mov ecx, fstateUnderline
mov ebx, '<u>'
cmp al, '_'
je .ProcessTag
mov ecx, fstateStrikeout
mov ebx, '<s>'
cmp al, '-'
je .ProcessTag
mov ecx, fstateItalic
mov ebx, '<i>'
cmp al, '/'
je .ProcessTag
cmp al, '['
je .is_link_begin
cmp al, ']'
je .is_link_end
.normal_char:
and [.state], not fstateWhite
.markup_ok:
mov [edi], al
lea edi, [edi+1]
jmp .scan_line
; ecx = state mask
; ebx = html tag
.ProcessTag:
mov dl, al
test [.state], ecx
jz .start_tag
; close tag
shl ebx, 8
mov bx, '</'
mov [edi], ebx
lea edi, [edi+4]
not ecx
and [.state], ecx
jmp .scan_line
.start_tag:
test [.state], fstateWhite
jz .normal_char
cmp byte [esi], $20
je .normal_char
cmp byte [esi], $09
je .normal_char
cmp byte [esi], $0d
je .normal_char
cmp byte [esi], $0a
je .normal_char
; open tag
mov [edi], ebx
lea edi, [edi+4]
or [.state], ecx
jmp .scan_line
.white_space:
or [.state], fstateWhite
mov [edi], al
lea edi, [edi+1]
jmp .scan_line
; end of line and end of file processing
.end_of_line:
test al, al
jz @f
xor al, $0d xor $0a
cmp [esi], al
jne @f
inc esi
@@:
or [.state], fstateWhite
cmp [.non_space], 0
jne .para_ok
test [.state], fstatePara
jz .para_ok
cmp word [edi-2], $0a0d
je .end_para
sub edi, 3 ; <p>
jmp .clear_para
.end_para:
sub edi, 2
mov dword [edi], '</p>'
lea edi, [edi+4]
.clear_para:
and [.state], not fstatePara
.para_ok:
test [.state], fstateHeader
jz .crlf
; insert back link
mov dword [edi], ' <a '
lea edi, [edi+4]
mov dword [edi], 'href'
lea edi, [edi+4]
mov dword [edi], '="#_'
lea edi, [edi+4]
push [.idString]
pop dword [edi]
lea edi, [edi+4]
mov dword [edi], $86e23e22 ; ">arrow
lea edi, [edi+4]
mov al, $a9 ; the end of the arrow char
mov byte [edi], $a9
lea edi, [edi+1]
mov dword [edi], '</a>'
lea edi, [edi+4]
mov byte [edi], '<'
lea edi, [edi+1]
mov ecx, [.last_header]
add ecx, '/h0>'
mov [edi], ecx
lea edi, [edi+4]
and [.state], not fstateHeader
.crlf:
mov word [edi], $0a0d
lea edi, [edi+2]
test al, al
jnz .line_start
.finish:
sub edi, HTML
stdcall FileWrite, [STDOUT], HTML, edi
mov edi, HTML
return
.is_link_begin:
test [.state], fstateWhite
jz .normal_char
dec esi
;.SearchLink:
push edi
push esi
; first compute the hash for the link label
mov edx, $811C9DC5 ; 2166136261 ; FNV offset basis
.hashloop:
mov al, [esi]
lea esi, [esi+1]
cmp al, $20
je .hashloop
cmp al, $09
je .hashloop
cmp al, $0d
je .not_a_link
cmp al, $0a
je .not_a_link
test al, al
je .not_a_link
xor dl, al
imul edx, $01000193 ; 16777619 ; FNV prime
cmp al, ']'
jne .hashloop
; fold the hash to 16 bit value...
mov ebx, edx
shr ebx, 16
xor ebx, edx
; and ebx, $ffff
dec ebx
.hash_loop:
lea ebx, [ebx+1]
and ebx, $ffff
mov edi, [HashTable+4*ebx]
mov esi, [esp]
test edi, edi
jnz .cmp_labels
.not_a_link:
pop esi edi
mov al, [esi]
lea esi, [esi+1]
jmp .normal_char ; the link address was not found, so it is not link at all.
.cmp_labels:
mov al, [esi]
lea esi, [esi+1]
cmp al, ' '
je .cmp_labels
cmp al, $09
je .cmp_labels
.target:
mov ah, [edi]
lea edi, [edi+1]
cmp ah, ' '
je .target
cmp ah, $09
je .target
cmp al, ah
jne .hash_loop ; it is not the same label - try again.
cmp ah, ']'
jne .cmp_labels
; the labels are equal, so search the address.
.firstnw:
mov al, [edi]
lea edi, [edi+1]
cmp al, ' '
je .firstnw
cmp al, $09
je .firstnw
dec edi
mov edx, edi
pop esi edi
; yes the link was found.
or [.state], fstateLink
mov dword [edi], '<a h'
lea edi, [edi+4]
mov dword [edi], 'ref='
lea edi, [edi+4]
mov byte [edi], '"'
lea edi, [edi+1]
push esi
mov esi, edx
.copy_link:
mov al, [esi]
lea esi, [esi+1]
cmp al, $0d
je .end_link
cmp al, $0a
je .end_link
test al, al
jz .end_link
mov [edi], al
lea edi, [edi+1]
jmp .copy_link
.end_link:
pop esi
mov word [edi], '">'
lea edi, [edi+2]
and [.state], not fstateWhite
inc esi
jmp .scan_line
;....................................................................
.is_link_end:
test [.state], fstateLink
jz .normal_char
mov dword [edi], '</a>'
lea edi, [edi+4]
and [.state], not fstateLink
jmp .scan_line
;....................................................................
; processing header items and building of the table of contents.
.header:
xor ecx, ecx
inc esi
.scan_level:
inc ecx
mov al, [esi]
lea esi, [esi+1]
cmp al, '#'
je .scan_level
dec esi
cmp ecx, 6
jbe @f
mov ecx, 6
@@:
shl ecx, 16
mov [.last_header], ecx
add ecx, '<h0 '
mov [edi], ecx
mov dword [edi+4], 'id="'
lea edi, [edi+8]
; create ID
mov eax, [.header_id]
mov ecx, 26
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+1], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+2], dl
xor edx, edx
div ecx
add dl, 'a'
mov byte [.idString+3], dl
inc [.header_id]
; insert the id here.
pushd [.idString]
popd [edi]
mov word [edi+4], '">'
lea edi, [edi+6]
.end_header:
or [.state], fstateHeader
jmp .scan_line
.skip:
mov al, [esi]
lea esi, [esi+1]
test al, al
jz .finish
cmp al, $0d
je .skip_eol
cmp al, $0a
jne .skip
.skip_eol:
xor al, $0d xor $0a
cmp [esi], al
jne .line_start
inc esi
jmp .line_start
endp
proc GetAddressSpace
begin
invoke VirtualAlloc, 0, eax, MEM_RESERVE, PAGE_READWRITE
return
endp
proc CommitMemory
begin
invoke VirtualAlloc,
endp
endmodule