Replace raw UTF8 parser with String parser (#6)

* Replace UTF8 parser with String based one

* swift format

* Add parsing errors that include context about where error is

* Remove old error tests
This commit is contained in:
Adam Fowler
2021-03-18 17:26:42 +00:00
committed by GitHub
parent a602593b5d
commit 05740bd7bc
8 changed files with 421 additions and 622 deletions

View File

@@ -1,6 +1,11 @@
extension HBMustacheTemplate {
enum Error: Swift.Error {
public struct ParserError: Swift.Error {
public let context: HBParser.Context
public let error: Swift.Error
}
public enum Error: Swift.Error {
case sectionCloseNameIncorrect
case unfinishedName
case expectedSectionEnd
@@ -46,23 +51,27 @@ extension HBMustacheTemplate {
/// parse mustache text to generate a list of tokens
static func parse(_ string: String) throws -> [Token] {
var parser = HBParser(string)
return try parse(&parser, state: .init())
do {
return try parse(&parser, state: .init())
} catch {
throw ParserError(context: parser.getContext(), error: error)
}
}
/// parse section in mustache text
static func parse(_ parser: inout HBParser, state: ParserState) throws -> [Token] {
var tokens: [Token] = []
var state = state
var whiteSpaceBefore: String = ""
var whiteSpaceBefore: Substring = ""
while !parser.reachedEnd() {
// if new line read whitespace
if state.newLine {
whiteSpaceBefore = parser.read(while: Set(" \t")).string
whiteSpaceBefore = parser.read(while: Set(" \t"))
}
let text = try readUntilDelimiterOrNewline(&parser, state: state)
// if we hit a newline add text
if parser.current() == "\n" {
tokens.append(.text(whiteSpaceBefore + text + "\n"))
if parser.current().isNewline {
tokens.append(.text(whiteSpaceBefore + text + String(parser.current())))
state.newLine = true
parser.unsafeAdvance()
continue
@@ -87,7 +96,7 @@ extension HBMustacheTemplate {
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method))
@@ -100,7 +109,7 @@ extension HBMustacheTemplate {
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method))
@@ -109,14 +118,16 @@ extension HBMustacheTemplate {
case "/":
// end of section
parser.unsafeAdvance()
let position = parser.position
let (name, method) = try parseName(&parser, state: state)
guard name == state.sectionName, method == state.sectionMethod else {
parser.unsafeSetPosition(position)
throw Error.sectionCloseNameIncorrect
}
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
return tokens
@@ -130,7 +141,7 @@ extension HBMustacheTemplate {
case "{":
// unescaped variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
parser.unsafeAdvance()
@@ -141,7 +152,7 @@ extension HBMustacheTemplate {
case "&":
// unescaped variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
parser.unsafeAdvance()
@@ -153,11 +164,11 @@ extension HBMustacheTemplate {
parser.unsafeAdvance()
let (name, _) = try parseName(&parser, state: state)
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
}
if isStandalone(&parser, state: state) {
setNewLine = true
tokens.append(.partial(name, indentation: whiteSpaceBefore))
tokens.append(.partial(name, indentation: String(whiteSpaceBefore)))
} else {
tokens.append(.partial(name, indentation: nil))
}
@@ -172,7 +183,7 @@ extension HBMustacheTemplate {
default:
// variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let (name, method) = try parseName(&parser, state: state)
@@ -189,24 +200,23 @@ extension HBMustacheTemplate {
/// read until we hit either the start delimiter of a tag or a newline
static func readUntilDelimiterOrNewline(_ parser: inout HBParser, state: ParserState) throws -> String {
var untilSet = Set("\n")
guard let delimiterFirstChar = state.startDelimiter.first,
let delimiterFirstScalar = delimiterFirstChar.unicodeScalars.first else { return "" }
var untilSet: Set<Character> = ["\n", "\r\n"]
guard let delimiterFirstChar = state.startDelimiter.first else { return "" }
var totalText = ""
untilSet.insert(delimiterFirstScalar)
untilSet.insert(delimiterFirstChar)
while !parser.reachedEnd() {
// read until we hit either a newline or "{"
let text = try parser.read(until: untilSet, throwOnOverflow: false).string
let text = try parser.read(until: untilSet, throwOnOverflow: false)
totalText += text
// if new line append all text read plus newline
if parser.current() == "\n" {
if parser.current().isNewline {
break
} else if parser.current() == delimiterFirstScalar {
if try parser.read(state.startDelimiter) {
} else if parser.current() == delimiterFirstChar {
if try parser.read(string: state.startDelimiter) {
break
}
totalText += String(delimiterFirstScalar)
totalText += String(delimiterFirstChar)
parser.unsafeAdvance()
}
}
@@ -216,59 +226,67 @@ extension HBMustacheTemplate {
/// parse variable name
static func parseName(_ parser: inout HBParser, state: ParserState) throws -> (String, String?) {
parser.read(while: \.isWhitespace)
var text = parser.read(while: sectionNameChars)
let text = String(parser.read(while: sectionNameChars))
parser.read(while: \.isWhitespace)
guard try parser.read(state.endDelimiter) else { throw Error.unfinishedName }
guard try parser.read(string: state.endDelimiter) else { throw Error.unfinishedName }
// does the name include brackets. If so this is a method call
let string = text.read(while: sectionNameCharsWithoutBrackets)
if text.reachedEnd() {
return (text.string, nil)
var nameParser = HBParser(String(text))
let string = nameParser.read(while: sectionNameCharsWithoutBrackets)
if nameParser.reachedEnd() {
return (text, nil)
} else {
// parse function parameter, as we have just parsed a function name
guard text.current() == "(" else { throw Error.unfinishedName }
text.unsafeAdvance()
let string2 = text.read(while: sectionNameCharsWithoutBrackets)
guard text.current() == ")" else { throw Error.unfinishedName }
text.unsafeAdvance()
guard text.reachedEnd() else { throw Error.unfinishedName }
return (string2.string, string.string)
guard nameParser.current() == "(" else { throw Error.unfinishedName }
nameParser.unsafeAdvance()
let string2 = nameParser.read(while: sectionNameCharsWithoutBrackets)
guard nameParser.current() == ")" else { throw Error.unfinishedName }
nameParser.unsafeAdvance()
guard nameParser.reachedEnd() else { throw Error.unfinishedName }
return (String(string2), String(string))
}
}
static func parseComment(_ parser: inout HBParser, state: ParserState) throws -> String {
let text = try parser.read(untilString: state.endDelimiter, throwOnOverflow: true, skipToEnd: true)
return text.string
return String(text)
}
static func parserSetDelimiter(_ parser: inout HBParser, state: ParserState) throws -> ParserState {
parser.read(while: \.isWhitespace)
let startDelimiter = try parser.read(until: \.isWhitespace).string
parser.read(while: \.isWhitespace)
let endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace }).string
parser.read(while: \.isWhitespace)
let startDelimiter: Substring
let endDelimiter: Substring
do {
parser.read(while: \.isWhitespace)
startDelimiter = try parser.read(until: \.isWhitespace)
parser.read(while: \.isWhitespace)
endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace })
parser.read(while: \.isWhitespace)
} catch {
throw Error.invalidSetDelimiter
}
guard try parser.read("=") else { throw Error.invalidSetDelimiter }
guard try parser.read(state.endDelimiter) else { throw Error.invalidSetDelimiter }
guard try parser.read(string: state.endDelimiter) else { throw Error.invalidSetDelimiter }
guard startDelimiter.count > 0, endDelimiter.count > 0 else { throw Error.invalidSetDelimiter }
return state.withDelimiters(start: startDelimiter, end: endDelimiter)
return state.withDelimiters(start: String(startDelimiter), end: String(endDelimiter))
}
static func hasLineFinished(_ parser: inout HBParser) -> Bool {
var parser2 = parser
if parser.reachedEnd() { return true }
parser2.read(while: Set(" \t\r"))
if parser2.current() == "\n" {
parser2.read(while: Set(" \t"))
if parser2.current().isNewline {
parser2.unsafeAdvance()
try! parser.setPosition(parser2.getPosition())
try! parser.setPosition(parser2.position)
return true
}
return false
}
static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool {
static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool {
return state.newLine && hasLineFinished(&parser)
}
private static let sectionNameCharsWithoutBrackets = Set<Unicode.Scalar>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?")
private static let sectionNameChars = Set<Unicode.Scalar>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()")
private static let sectionNameCharsWithoutBrackets = Set<Character>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?")
private static let sectionNameChars = Set<Character>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()")
}