#### libremiliacr
#### Copyright(C) 2020-2024 Remilia Scarlet <remilia@posteo.jp>
####
#### This program is free software: you can redistribute it and/or modify it
#### under the terms of the GNU General Public License as published the Free
#### Software Foundation, either version 3 of the License, or (at your option)
#### any later version.
####
#### This program is distributed in the hope that it will be useful, but WITHOUT
#### ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
#### FITNESS FOR A PARTICULAR PURPOSE.See the GNU General Public License for
#### more details.
####
#### You should have received a copy of the GNU General Public License along
#### with this program.If not, see<http:####www.gnu.org/licenses/.>
require "./common"
module RemiLib::RSConf
# The `Parser` class is used to parse RSConf data into `RSValue`s.
#
# ```crystal
# require "libremiliacr"
#
# rsconfStr = %|
# foo = 69
# bar = "hello, world"|
#
# parser = RemiLib::RSConf::Parser.new(rsconfStr)
# pp parser.parse
# ```
class Parser
@stream : IO
@line : UInt64 = 1
@col : UInt64 = 0
# :nodoc:
macro raiseParseError(msg)
raise RSConfParseError.new(@line, @col, "Near (#{@line}, #{@col}): #{ {{msg}} }")
end
# :nodoc:
macro raiseReturnOrPageFound(char)
%code = sprintf("%02X", {{char}}.ord)
%shortName = {{char}} == '\f' ? "\\f" : "\\r"
%name = {{char}} == '\f' ? "Page break" : "Return"
raiseParseError("Illegal character: 0x#{%code} (#{%shortName}, #{%name}")
end
# :nodoc:
macro raiseIllegalWhitespace(char)
%code = sprintf("%02X", {{char}}.ord)
raiseParseError("Illegal whitespace character: 0x#{%code}")
end
# Creates a new `Parser` that will read from *stream*.
def initialize(@stream : IO)
unless @stream.encoding == "UTF-8"
raise RSConfEncodingError.new("Unsupported stream encoding: #{@stream.encoding}")
end
end
# Creates a new `Parser` that will read from *string*.
def initialize(string : String)
@stream = IO::Memory.new(string)
end
# Parses RSConf data.
def parse : RSTopLevel
readDocument
end
# Parses RSConf data from a file.
def self.parse(filename : Path) : RSTopLevel
File.open(filename, "rb") do |file|
Parser.parse(file)
end
end
# Parses RSConf data from a string.
def self.parse(str : String) : RSTopLevel
io = IO::Memory.new(str)
Parser.parse(io)
end
# Parses RSConf data from an `IO`.
def self.parse(io : IO) : RSTopLevel
Parser.new(io).parse
end
############################################################################
# Advances one line.
@[AlwaysInline]
protected def advLine : Nil
@line += 1
@col = 1
end
# Advances one column.
@[AlwaysInline]
protected def adv : Nil
@col += 1
end
@[AlwaysInline]
protected def whitespaceCharButNotNewline(char : Char) : Bool
char.ord == 32 || char.ord == 9 # Space or Tab
end
@[AlwaysInline]
protected def whitespaceChar(char : Char) : Bool
char.ord == 32 || char.ord == 9 || char.ord == 10 # Space, Tab, or Newline
end
@[AlwaysInline]
protected def illegalWhitespaceChar(char : Char) : Bool
ILLEGAL_WHITESPACE.includes?(char.ord)
end
@[AlwaysInline]
protected def readChar : Char?
ret = @stream.read_char
if ret && (ret == '\r' || ret == '\f')
raiseReturnOrPageFound(ret)
elsif ret
if ret == '\n'
advLine
else ret
adv
end
end
ret
end
@[AlwaysInline]
protected def peekChar : Char?
ret : Char? = nil
@stream.withExcursion do
ret = @stream.read_char
end
ret
end
@[AlwaysInline]
protected def readChar! : Char?
ret = @stream.read_char
if ret
if ret == '\n'
advLine
else
adv
end
end
ret
end
@[AlwaysInline]
protected def skipWhitespace : Nil
c : Char? = nil
loop do
c = peekChar
if c.nil?
break
elsif whitespaceChar(c)
if c == '\r' || c == '\f'
raiseReturnOrPageFound(c)
else
readChar
end
elsif illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
else
break
end
end
end
@[AlwaysInline]
protected def skipSpaces : Nil
c : Char? = nil
loop do
c = peekChar
if c.nil?
break
elsif whitespaceCharButNotNewline(c)
readChar
elsif c == '\r' || c == '\f'
raiseReturnOrPageFound(c)
elsif illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
else
break
end
end
end
@[AlwaysInline]
protected def readComment : Nil
c : Char? = nil
loop do
c = readChar
if c.nil? || c == '\n'
break
elsif c == '\r' || c == '\f'
raiseReturnOrPageFound(c)
end
end
end
@[AlwaysInline]
protected def skipWhitespaceAndComments : Nil
c : Char? = nil
loop do
c = peekChar
if c.nil?
break
elsif whitespaceCharButNotNewline(c)
readChar
elsif c == '\r' || c == '\f'
raiseReturnOrPageFound(c)
elsif illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
elsif c == ';'
readComment
else
break
end
end
end
protected def readNumber : RSScalar
radix : UInt8 = 10 # Assume decimal until determined otherwise
float? : Bool = false
expChar : Char = 'e'
haveExpChar? : Bool = false
first? : Bool = true
ret : Int64|Float64 = 0
# We'll build the number into a string, then convert it later.
#
# TODO this could probably be done more efficiently without the string?
retStr = String.build do |str|
c = peekChar || raise "Character was not expected to be Nil"
RemiLib.assert(c != '\r')
RemiLib.assert(c != '\f')
# We now need to see if we're starting off with a special radix (e.g. #x
# or #b), or a digit.
if c == '#'
readChar
c = peekChar || raiseParseError("Unexpected end of stream")
# We've handled Nil, so now figure out the radix. The code below
# calls #readChar which will handle #\Return and #\Page for us.
case c.downcase
when 'x'
readChar
radix = 16
when 'o'
readChar
radix = 8
when 'b'
readChar
radix = 2
else
raiseParseError("Invalid radix character: '#{c}'")
end
first? = false
elsif c.to_i?(radix) || c == '+' || c == '-'
readChar
str << c
first? = false
else
# Anything else is an error
raiseParseError("Expected a digit or the start of a radix")
end
# Now read characters. The #readChar will automatically check for
# #\Return and #\Page as we go.
loop do
c = peekChar
if c.nil? || whitespaceChar(c)
readChar
break
elsif illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
elsif c == ',' || c == ']' || c == '}'
break
elsif c.to_i?(radix)
readChar
str << c
elsif c == '.'
readChar
# Switch to float mode, or error if we're already reading floats or
# are using the wrong radix.
if float?
raiseParseError("Unexpected extra period in float")
elsif radix != 10
raiseParseError("Floats must be in decimal")
else
float? = true
end
str << c
elsif c.downcase == 'e' || c.downcase == 'd'
expChar = c.downcase
if float?
if haveExpChar?
raiseParseError("Unexpected character while reading float: '#{c}'")
else
haveExpChar? = true
end
else
if haveExpChar?
raiseParseError("Unexpected character while reading integer: '#{c}'")
else
haveExpChar? = true
float? = true
end
end
# We downcase it here in case it's an #\e. This gets around having
# to do a String#downcase later on when we parse for a float.
readChar
str << c.downcase
elsif c == '-' || c == '+'
if float?
if !haveExpChar?
raiseParseError("Unexpected sign character in float number: '#{c}'")
end
elsif !first?
raiseParseError("Unexpected sign character in integer: '#{c}'")
end
readChar
str << c
else
# Everything else is an error
raiseParseError("Bad numeric character: '#{c}'")
end # if c.nil? || whitespaceChar(c)
first? = false
end # loop do
end
# Now try to parse the number. If we hit an ArgumentError, raise our own
# parsing error instead, though this should never happen.
begin
ret = if float?
RemiLib.assert(expChar == 'e' || expChar == 'd')
# Replace 'd' with 'e' because we aren't in Common Lisp.
# ameba:disable Lint/UselessAssign
retStr.gsub('d', 'e').to_f64(whitespace = false)
else
retStr.to_i64(radix)
end
rescue ArgumentError
raiseParseError("Could not parse numeric value")
end
RSScalar.new(ret)
end
protected def readEscapedUTF8Char(startLine : UInt64, startCol : UInt64) : Char
ret = String.build do |codeStr|
loop do
num = readChar
case
when num.nil?
raiseParseError("Unterminated UTF-8 character in the string starting at " \
"line #{startLine} column #{startCol}")
when num.to_i?(16) # Always in hex according to the spec
codeStr << num
when num == '}'
break
else
raiseParseError("Invalid UTF-8 code in the string starting at line #{startLine} column #{startCol}")
end
end
end
begin
ret.to_i32(16).chr
rescue err : ArgumentError
raiseParseError("Could not parse escaped UTF-8 character in string starting at " \
"line #{startLine} column #{startCol}: #{err}")
end
end
protected def readString : RSScalar
startLine : UInt64 = @line
startCol : UInt64 = @col
backslash? : Bool = false
RSScalar.new(
String.build do |str|
# This must be true since the caller should only have peeked at the next
# character.
raise "Unexpectedly tried to read a string" unless readChar == '"'
loop do
c = readChar!
case
when c.nil?
raiseParseError("Unterminated string starting at line #{startLine} column #{startCol} "\
"(end of stream reached)")
when c == '"'
if backslash?
str << c
backslash? = false
else
break
end
when c == '\\'
if backslash?
# Write one backslash
str << c
backslash? = false
else
backslash? = true
end
when c == 'u'
if backslash?
if readChar == '{'
str << readEscapedUTF8Char(startLine, startCol)
else
# Write both the backslash and the #\u
str << '\\'
str << c
end
backslash? = false
else
str << c
end
else
str << c
backslash? = false
end
end
end)
end
# Reads a key name. This handles #\Return and #\Page charactgers according
# to the spec.
protected def readKeyName : String
skipWhitespace
ret : String = ""
# Check for a quoted key name first.
if peekChar == '"'
ret = readString.val.as(String)
skipSpaces
raiseParseError("Expected a colon after the key name") unless readChar == ':'
else
# Not a quoted name, so now we read characters to construct the name instead.
ret = String.build do |str|
loop do
c = readChar
case
when c.nil?
raiseParseError("Unexpected end of stream")
when c == ':'
break
when c == '"' || c == '{' || c == '}' || c == '[' || c == ']'
raiseParseError("Invalid character in unquoted key name: '#{c}'")
else
str << c
end
end
end
if ret.includes?('\n')
raiseParseError("Unquoted key names cannot contain newlines")
end
ret = ret.strip
end
if ret.empty? || ret.blank?
raiseParseError("Empty key name")
end
ret
end
protected def readBoolean : RSScalar
firstChar : Char? = readChar || raise "Can't read bool, but expected to"
case firstChar
when 't'
r = readChar || raiseParseError("Unexpected end of stream")
u = readChar || raiseParseError("Unexpected end of stream")
e = readChar || raiseParseError("Unexpected end of stream")
if r.downcase == 'r' && u.downcase == 'u' && e.downcase == 'e'
RSScalar.new(true)
else
raiseParseError("Bad boolean value")
end
when 'f'
a = readChar || raiseParseError("Unexpected end of stream")
l = readChar || raiseParseError("Unexpected end of stream")
s = readChar || raiseParseError("Unexpected end of stream")
e = readChar || raiseParseError("Unexpected end of stream")
if a.downcase == 'a' && l.downcase == 'l' && s.downcase == 's' && e.downcase == 'e'
RSScalar.new(false)
else
raiseParseError("Bad boolean value")
end
else
raiseParseError("Unexpected character where a boolean was expected: '#{firstChar}'")
end
end
protected def readNull : RSScalar
n = readChar || raise "Can't read nil, but expected to"
i = readChar || raiseParseError("Unexpected end of stream")
l = readChar || raiseParseError("Unexpected end of stream")
unless n.downcase == 'n' && i.downcase == 'i' && l.downcase == 'l'
raiseParseError("Bad null value")
end
RSScalar.new(nil)
end
@[AlwaysInline]
protected def maybeReadComma : Nil
skipSpaces
readChar if peekChar == ','
skipWhitespace
end
protected def readValue : RSValue?
ret : RSValue? = nil
skipWhitespace
loop do
c = peekChar
raiseParseError("Expected a value") if c.nil?
if c == ';'
readComment
skipWhitespace
else
ret = case
when c.to_i?(10) || c == '#' || c == '+' || c == '-'
readNumber
when c == '{'
readObject
when c == '['
readArray
when c == '"'
readString
when c.downcase == 't' || c.downcase == 'f'
readBoolean
when c.downcase == 'n'
readNull
else
raiseParseError("Unexpected character where a value was expected: '#{c}'")
end
break
end
end
maybeReadComma
skipWhitespace
ret
end
private def readObject : RSObject
ret : RSObject = RSObject.new
raise "Expected start of hash" unless readChar == '{'
skipWhitespace
loop do
c = peekChar
case
when c.nil? || c == '}'
readChar
maybeReadComma
break
when whitespaceChar(c)
readChar
when illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
when c == ';'
readComment
skipWhitespace
when c == '"' || !(c == '{' || c == '}' || c == '[' || c == ']')
key : String = readKeyName
val : RSValue|Nil = readValue
if val.nil?
raiseParseError("Unexpected end of stream")
else
ret[key] = val.as(RSValue)
end
else
raiseParseError("Expected a key or the end of an object")
end
end
ret
end
private def readArray : RSArray
ret : RSArray = RSArray.new
raise "Expected start of array" unless readChar == '['
skipWhitespace
loop do
c = peekChar
case
when c.nil? || c == ']'
readChar
maybeReadComma
break
when whitespaceChar(c)
readChar
when illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
when c == ';'
readComment
skipWhitespace
when c == '"' || c == '{' || c == '}' || c == '[' || c == ']' ||
c.to_i?(10) || c == '+' || c == '-' || c == '#' ||
c == 't' || c == 'f' || c == 'n'
val : RSValue|Nil = readValue
if val.nil?
raiseParseError("Unexpected end of stream")
else
ret << val.as(RSValue)
end
skipWhitespace
else
raiseParseError("Expected a value or the end of an array")
end
end
ret
end
protected def testForBOM : Nil
# This check was determined by writing raw bytes into a byte stream, then
# converting it to a string, then reading from that string. Since some of
# these characters wouldn't necessarily be valid outside of a string
# anyway, this sort of check works both for a possible Byte-Order-Mark, as
# well as an initial "hey is the first character a bad one?" for these
# characters. We must do this before skipping any initial whitespace.
#
# https://en.wikipedia.org/wiki/Byte_order_mark
bomTest : Int32|Nil = peekChar.try(&.ord)
if bomTest == 0xFEFF || # Zero width no-break space
bomTest == 0xFFFD || # Replacement character
bomTest == 0x2B || # +
bomTest == 0x0E # So
raise RSConfBOMError.new("Possible byte-order-mark detected, or junk in toplevel")
end
end
private def readDocument : RSTopLevel
ret : RSObject|RSArray|Nil = nil
startedObject : Bool = false
testForBOM
skipWhitespace
loop do
c = peekChar
#puts "At #{@stream.pos} (#{@line}, #{@col}): #{c}"
case
when c.nil?
break
when whitespaceChar(c)
skipWhitespace
when illegalWhitespaceChar(c)
raiseIllegalWhitespace(c)
when c == ';'
readComment
when c == '{'
raiseParseError("Unexpected start of document object") if startedObject
ret = readObject
break
when c == '['
raiseParseError("Unexpected start of document array") if startedObject
ret = readArray
break
when c == '"' || c.alphanumeric?
unless startedObject
startedObject = true
RemiLib.assert(ret.nil?)
ret = RSObject.new
end
RemiLib.assert(ret.is_a?(RSObject))
key : String = readKeyName
val : RSValue|Nil = readValue
if val.nil?
raiseParseError("Unexpected end of stream")
else
ret[key] = val.as(RSValue)
end
else
raiseParseError("Unexpected character at toplevel: '#{c}'")
end
end
skipWhitespaceAndComments
raiseParseError("Unexpected junk at toplevel") unless readChar.nil?
if ret.nil?
RSObject.new
else
ret.as(RSObject|RSArray)
end
end
end
end
|