Replace raw UTF8 parser with String parser (#6)

* Replace UTF8 parser with String based one

* swift format

* Add parsing errors that include context about where error is

* Remove old error tests
This commit is contained in:
Adam Fowler
2021-03-18 17:26:42 +00:00
committed by GitHub
parent a602593b5d
commit 05740bd7bc
8 changed files with 421 additions and 622 deletions

View File

@@ -17,8 +17,8 @@ extension HBMustacheLibrary {
let template: HBMustacheTemplate
do {
template = try HBMustacheTemplate(string: string)
} catch {
throw Error.failedToLoad(path, error)
} catch let error as HBMustacheTemplate.ParserError {
throw ParserError(filename: path, context: error.context, error: error.error)
}
// drop ".mustache" from path to get name
let name = String(path.dropLast(extWithDot.count))

View File

@@ -47,8 +47,14 @@ public final class HBMustacheLibrary {
return template.render(object)
}
public enum Error: Swift.Error {
case failedToLoad(String, Swift.Error)
/// Error returned by init() when parser fails
public struct ParserError: Swift.Error {
/// File error occurred in
public let filename: String
/// Context (line, linenumber and column number)
public let context: HBParser.Context
/// Actual error that occurred
public let error: Error
}
private var templates: [String: HBMustacheTemplate]

View File

@@ -1,100 +1,66 @@
// Parser.swift
//
// Half inspired by Reader class from John Sundell's Ink project
// https://github.com/JohnSundell/Ink/blob/master/Sources/Ink/Internal/Reader.swift
// with optimisation working ie removing String and doing my own UTF8 processing inspired by Fabian Fett's work in
// https://github.com/swift-extras/swift-extras-json/blob/main/Sources/ExtrasJSON/Parsing/DocumentReader.swift
//
// This is a copy of the parser from Hummingbird. I am not using the version in Hummingbird to avoid the dependency
import Foundation
/// Reader object for parsing String buffers
struct HBParser {
public struct HBParser {
enum Error: Swift.Error {
case overflow
case unexpected
case emptyString
case invalidUTF8
case invalidPosition
}
/// Create a Parser object
/// - Parameter string: UTF8 data to parse
init?<Bytes: Collection>(_ utf8Data: Bytes, validateUTF8: Bool = true) where Bytes.Element == UInt8 {
if let buffer = utf8Data as? [UInt8] {
/// internal storage used to store String
private class Storage {
init(_ buffer: String) {
self.buffer = buffer
} else {
buffer = Array(utf8Data)
}
index = 0
range = 0 ..< buffer.endIndex
// should check that the data is valid utf8
if validateUTF8 == true, self.validateUTF8() == false {
return nil
}
let buffer: String
}
private let _storage: Storage
/// Create a Reader object
/// - Parameter string: String to parse
init(_ string: String) {
buffer = Array(string.utf8)
index = 0
range = 0 ..< buffer.endIndex
_storage = Storage(string)
position = string.startIndex
}
/// Return contents of parser as a string
var count: Int {
return range.count
}
/// Return contents of parser as a string
var string: String {
return makeString(buffer[range])
}
private var buffer: [UInt8]
private var index: Int
private let range: Range<Int>
}
// MARK: sub-parsers
extension HBParser {
/// initialise a parser that parses a section of the buffer attached to another parser
init(_ parser: HBParser, range: Range<Int>) {
buffer = parser.buffer
index = range.startIndex
self.range = range
precondition(range.startIndex >= 0 && range.endIndex <= buffer.endIndex)
precondition(buffer[range.startIndex] & 0xC0 != 0x80) // check we arent in the middle of a UTF8 character
}
/// initialise a parser that parses a section of the buffer attached to this parser
func subParser(_ range: Range<Int>) -> HBParser {
if range.startIndex == range.endIndex {
return HBParser(self, range: self.range.startIndex ..< self.range.startIndex)
}
return HBParser(self, range: range)
}
var buffer: String { return _storage.buffer }
private(set) var position: String.Index
}
extension HBParser {
/// Return current character
/// - Throws: .overflow
/// - Returns: Current character
mutating func character() throws -> Unicode.Scalar {
guard !reachedEnd() else { throw Error.overflow }
return unsafeCurrentAndAdvance()
mutating func character() throws -> Character {
guard !reachedEnd() else { throw HBParser.Error.overflow }
let c = unsafeCurrent()
unsafeAdvance()
return c
}
/// Read the current character and return if it is as intended. If character test returns true then move forward 1
/// - Parameter char: character to compare against
/// - Throws: .overflow
/// - Returns: If current character was the one we expected
mutating func read(_ char: Unicode.Scalar) throws -> Bool {
let initialIndex = index
mutating func read(_ char: Character) throws -> Bool {
let c = try character()
guard c == char else { index = initialIndex; return false }
guard c == char else { unsafeRetreat(); return false }
return true
}
/// Read the current character and return if it is as intended. If character test returns true then move forward 1
/// - Parameter char: character to compare against
/// - Throws: .overflow
/// - Returns: If current character was the one we expected
mutating func read(string: String) throws -> Bool {
let initialPosition = position
guard string.count > 0 else { return true }
let subString = try read(count: string.count)
guard subString == string else {
position = initialPosition
return false
}
return true
}
@@ -102,22 +68,9 @@ extension HBParser {
/// - Parameter characterSet: Set of characters to compare against
/// - Throws: .overflow
/// - Returns: If current character is in character set
mutating func read(_ characterSet: Set<Unicode.Scalar>) throws -> Bool {
let initialIndex = index
mutating func read(_ characterSet: Set<Character>) throws -> Bool {
let c = try character()
guard characterSet.contains(c) else { index = initialIndex; return false }
return true
}
/// Compare characters at current position against provided string. If the characters are the same as string provided advance past string
/// - Parameter string: String to compare against
/// - Throws: .overflow, .emptyString
/// - Returns: If characters at current position equal string
mutating func read(_ string: String) throws -> Bool {
let initialIndex = index
guard string.count > 0 else { throw Error.emptyString }
let subString = try read(count: string.count)
guard subString.string == string else { index = initialIndex; return false }
guard characterSet.contains(c) else { unsafeRetreat(); return false }
return true
}
@@ -125,93 +78,31 @@ extension HBParser {
/// - Parameter count: Number of characters to read
/// - Throws: .overflow
/// - Returns: The string read from the buffer
mutating func read(count: Int) throws -> HBParser {
var count = count
var readEndIndex = index
while count > 0 {
guard readEndIndex != range.endIndex else { throw Error.overflow }
readEndIndex = skipUTF8Character(at: readEndIndex)
count -= 1
}
let result = subParser(index ..< readEndIndex)
index = readEndIndex
return result
mutating func read(count: Int) throws -> Substring {
guard buffer.distance(from: position, to: buffer.endIndex) >= count else { throw HBParser.Error.overflow }
let end = buffer.index(position, offsetBy: count)
let subString = buffer[position ..< end]
unsafeAdvance(by: count)
return subString
}
/// Read from buffer until we hit a character. Position after this is of the character we were checking for
/// - Parameter until: Unicode.Scalar to read until
/// - Parameter until: Character to read until
/// - Throws: .overflow if we hit the end of the buffer before reading character
/// - Returns: String read from buffer
@discardableResult mutating func read(until: Unicode.Scalar, throwOnOverflow: Bool = true) throws -> HBParser {
let startIndex = index
@discardableResult mutating func read(until: Character, throwOnOverflow: Bool = true) throws -> Substring {
let startIndex = position
while !reachedEnd() {
if unsafeCurrent() == until {
return subParser(startIndex ..< index)
return buffer[startIndex ..< position]
}
unsafeAdvance()
}
if throwOnOverflow {
_setPosition(startIndex)
throw Error.overflow
unsafeSetPosition(startIndex)
throw HBParser.Error.overflow
}
return subParser(startIndex ..< index)
}
/// Read from buffer until we hit a character in supplied set. Position after this is of the character we were checking for
/// - Parameter characterSet: Unicode.Scalar set to check against
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until characterSet: Set<Unicode.Scalar>, throwOnOverflow: Bool = true) throws -> HBParser {
let startIndex = index
while !reachedEnd() {
if characterSet.contains(unsafeCurrent()) {
return subParser(startIndex ..< index)
}
unsafeAdvance()
}
if throwOnOverflow {
_setPosition(startIndex)
throw Error.overflow
}
return subParser(startIndex ..< index)
}
/// Read from buffer until we hit a character that returns true for supplied closure. Position after this is of the character we were checking for
/// - Parameter until: Function to test
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until: (Unicode.Scalar) -> Bool, throwOnOverflow: Bool = true) throws -> HBParser {
let startIndex = index
while !reachedEnd() {
if until(unsafeCurrent()) {
return subParser(startIndex ..< index)
}
unsafeAdvance()
}
if throwOnOverflow {
_setPosition(startIndex)
throw Error.overflow
}
return subParser(startIndex ..< index)
}
/// Read from buffer until we hit a character where supplied KeyPath is true. Position after this is of the character we were checking for
/// - Parameter characterSet: Unicode.Scalar set to check against
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until keyPath: KeyPath<Unicode.Scalar, Bool>, throwOnOverflow: Bool = true) throws -> HBParser {
let startIndex = index
while !reachedEnd() {
if unsafeCurrent()[keyPath: keyPath] {
return subParser(startIndex ..< index)
}
unsafeAdvance()
}
if throwOnOverflow {
_setPosition(startIndex)
throw Error.overflow
}
return subParser(startIndex ..< index)
return buffer[startIndex ..< position]
}
/// Read from buffer until we hit a string. By default the position after this is of the beginning of the string we were checking for
@@ -220,52 +111,106 @@ extension HBParser {
/// - Parameter skipToEnd: Should we set the position to after the found string
/// - Throws: .overflow, .emptyString
/// - Returns: String read from buffer
@discardableResult mutating func read(untilString: String, throwOnOverflow: Bool = true, skipToEnd: Bool = false) throws -> HBParser {
var untilString = untilString
return try untilString.withUTF8 { utf8 in
guard utf8.count > 0 else { throw Error.emptyString }
let startIndex = index
var foundIndex = index
var untilIndex = 0
while !reachedEnd() {
if buffer[index] == utf8[untilIndex] {
if untilIndex == 0 {
foundIndex = index
}
untilIndex += 1
if untilIndex == utf8.endIndex {
unsafeAdvance()
if skipToEnd == false {
index = foundIndex
}
let result = subParser(startIndex ..< foundIndex)
return result
}
} else {
untilIndex = 0
@discardableResult mutating func read(untilString: String, throwOnOverflow: Bool = true, skipToEnd: Bool = false) throws -> Substring {
guard untilString.count > 0 else { return "" }
let startIndex = position
var foundIndex = position
var untilIndex = untilString.startIndex
while !reachedEnd() {
if unsafeCurrent() == untilString[untilIndex] {
if untilIndex == untilString.startIndex {
foundIndex = position
}
index += 1
untilIndex = untilString.index(after: untilIndex)
if untilIndex == untilString.endIndex {
unsafeAdvance()
if skipToEnd == false {
position = foundIndex
}
let result = buffer[startIndex ..< foundIndex]
return result
}
} else {
untilIndex = untilString.startIndex
}
if throwOnOverflow {
_setPosition(startIndex)
throw Error.overflow
}
return subParser(startIndex ..< index)
unsafeAdvance()
}
if throwOnOverflow {
position = startIndex
throw Error.overflow
}
return buffer[startIndex ..< position]
}
/// Read from buffer until we hit a character in supplied set. Position after this is of the character we were checking for
/// - Parameter characterSet: Character set to check against
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until characterSet: Set<Character>, throwOnOverflow: Bool = true) throws -> Substring {
let startIndex = position
while !reachedEnd() {
if characterSet.contains(unsafeCurrent()) {
return buffer[startIndex ..< position]
}
unsafeAdvance()
}
if throwOnOverflow {
unsafeSetPosition(startIndex)
throw HBParser.Error.overflow
}
return buffer[startIndex ..< position]
}
/// Read from buffer until keyPath on character returns true. Position after this is of the character we were checking for
/// - Parameter keyPath: keyPath to check
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until keyPath: KeyPath<Character, Bool>, throwOnOverflow: Bool = true) throws -> Substring {
let startIndex = position
while !reachedEnd() {
if current()[keyPath: keyPath] {
return buffer[startIndex ..< position]
}
unsafeAdvance()
}
if throwOnOverflow {
position = startIndex
throw Error.overflow
}
return buffer[startIndex ..< position]
}
/// Read from buffer until keyPath on character returns true. Position after this is of the character we were checking for
/// - Parameter keyPath: keyPath to check
/// - Throws: .overflow
/// - Returns: String read from buffer
@discardableResult mutating func read(until cb: (Character) -> Bool, throwOnOverflow: Bool = true) throws -> Substring {
let startIndex = position
while !reachedEnd() {
if cb(current()) {
return buffer[startIndex ..< position]
}
unsafeAdvance()
}
if throwOnOverflow {
position = startIndex
throw Error.overflow
}
return buffer[startIndex ..< position]
}
/// Read from buffer from current position until the end of the buffer
/// - Returns: String read from buffer
@discardableResult mutating func readUntilTheEnd() -> HBParser {
let startIndex = index
index = range.endIndex
return subParser(startIndex ..< index)
@discardableResult mutating func readUntilTheEnd() -> Substring {
let startIndex = position
position = buffer.endIndex
return buffer[startIndex ..< position]
}
/// Read while character at current position is the one supplied
/// - Parameter while: Unicode.Scalar to check against
/// - Parameter while: Character to check against
/// - Returns: String read from buffer
@discardableResult mutating func read(while: Unicode.Scalar) -> Int {
@discardableResult mutating func read(while: Character) -> Int {
var count = 0
while !reachedEnd(),
unsafeCurrent() == `while`
@@ -276,68 +221,74 @@ extension HBParser {
return count
}
/// Read while character at current position is in supplied set
/// - Parameter while: character set to check
/// Read while keyPath on character at current position returns true is the one supplied
/// - Parameter while: keyPath to check
/// - Returns: String read from buffer
@discardableResult mutating func read(while characterSet: Set<Unicode.Scalar>) -> HBParser {
let startIndex = index
while !reachedEnd(),
characterSet.contains(unsafeCurrent())
{
unsafeAdvance()
}
return subParser(startIndex ..< index)
}
/// Read while character returns true for supplied closure
/// - Parameter while: character set to check
/// - Returns: String read from buffer
@discardableResult mutating func read(while: (Unicode.Scalar) -> Bool) -> HBParser {
let startIndex = index
while !reachedEnd(),
`while`(unsafeCurrent())
{
unsafeAdvance()
}
return subParser(startIndex ..< index)
}
/// Read while character returns true for supplied KeyPath
/// - Parameter while: character set to check
/// - Returns: String read from buffer
@discardableResult mutating func read(while keyPath: KeyPath<Unicode.Scalar, Bool>) -> HBParser {
let startIndex = index
@discardableResult mutating func read(while keyPath: KeyPath<Character, Bool>) -> Substring {
let startIndex = position
while !reachedEnd(),
unsafeCurrent()[keyPath: keyPath]
{
unsafeAdvance()
}
return subParser(startIndex ..< index)
return buffer[startIndex ..< position]
}
/// Split parser into sections separated by character
/// - Parameter separator: Separator character
/// - Returns: arrays of sub parsers
mutating func split(separator: Unicode.Scalar) -> [HBParser] {
var subParsers: [HBParser] = []
while !reachedEnd() {
do {
let section = try read(until: separator)
subParsers.append(section)
unsafeAdvance()
} catch {
if !reachedEnd() {
subParsers.append(readUntilTheEnd())
}
}
/// Read while character at current position is in supplied set
/// - Parameter while: character set to check
/// - Returns: String read from buffer
@discardableResult mutating func read(while characterSet: Set<Character>) -> Substring {
let startIndex = position
while !reachedEnd(),
characterSet.contains(unsafeCurrent())
{
unsafeAdvance()
}
return subParsers
return buffer[startIndex ..< position]
}
/// Return whether we have reached the end of the buffer
/// - Returns: Have we reached the end
func reachedEnd() -> Bool {
return index == range.endIndex
return position == buffer.endIndex
}
/// Return whether we are at the start of the buffer
/// - Returns: Are we are the start
func atStart() -> Bool {
return position == buffer.startIndex
}
}
extension HBParser {
public struct Context {
public let line: String
public let lineNumber: Int
public let columnNumber: Int
}
/// Return context of current position (line, lineNumber, columnNumber)
func getContext() -> Context {
var parser = self
var columnNumber = 0
while !parser.atStart() {
try? parser.retreat()
if parser.current() == "\n" {
break
}
columnNumber += 1
}
if parser.current() == "\n" {
try? parser.advance()
}
// read line from parser
let line = try! parser.read(until: Character("\n"), throwOnOverflow: false)
// count new lines up to this current position
let buffer = parser.buffer
let textBefore = buffer[buffer.startIndex ..< position]
let lineNumber = textBefore.filter { $0.isNewline }.count
return Context(line: String(line), lineNumber: lineNumber + 1, columnNumber: columnNumber + 1)
}
}
@@ -345,325 +296,71 @@ extension HBParser {
extension HBParser {
/// Return the character at the current position
/// - Throws: .overflow
/// - Returns: Unicode.Scalar
func current() -> Unicode.Scalar {
guard !reachedEnd() else { return Unicode.Scalar(0) }
/// - Returns: Character
func current() -> Character {
guard !reachedEnd() else { return "\0" }
return unsafeCurrent()
}
/// Move forward one character
/// - Throws: .overflow
mutating func advance() throws {
guard !reachedEnd() else { throw Error.overflow }
guard !reachedEnd() else { throw HBParser.Error.overflow }
return unsafeAdvance()
}
/// Move back one character
/// - Throws: .overflow
mutating func retreat() throws {
guard position != buffer.startIndex else { throw HBParser.Error.overflow }
return unsafeRetreat()
}
/// Move forward so many character
/// - Parameter amount: number of characters to move forward
/// - Throws: .overflow
mutating func advance(by amount: Int) throws {
var amount = amount
while amount > 0 {
guard !reachedEnd() else { throw Error.overflow }
index = skipUTF8Character(at: index)
amount -= 1
}
}
/// Move backwards one character
/// - Throws: .overflow
mutating func retreat() throws {
guard index > range.startIndex else { throw Error.overflow }
index = backOneUTF8Character(at: index)
guard buffer.distance(from: position, to: buffer.endIndex) >= amount else { throw HBParser.Error.overflow }
return unsafeAdvance(by: amount)
}
/// Move back so many characters
/// - Parameter amount: number of characters to move back
/// - Throws: .overflow
mutating func retreat(by amount: Int) throws {
var amount = amount
while amount > 0 {
guard index > range.startIndex else { throw Error.overflow }
index = backOneUTF8Character(at: index)
amount -= 1
}
guard buffer.distance(from: buffer.startIndex, to: position) >= amount else { throw HBParser.Error.overflow }
return unsafeRetreat(by: amount)
}
mutating func setPosition(_ position: String.Index) throws {
guard position <= buffer.endIndex else { throw HBParser.Error.overflow }
unsafeSetPosition(position)
}
}
// unsafe versions without checks
extension HBParser {
func unsafeCurrent() -> Character {
return buffer[position]
}
mutating func unsafeAdvance() {
index = skipUTF8Character(at: index)
position = buffer.index(after: position)
}
mutating func unsafeRetreat() {
position = buffer.index(before: position)
}
mutating func unsafeAdvance(by amount: Int) {
var amount = amount
while amount > 0 {
index = skipUTF8Character(at: index)
amount -= 1
}
position = buffer.index(position, offsetBy: amount)
}
func getPosition() -> Int {
return index
mutating func unsafeRetreat(by amount: Int) {
position = buffer.index(position, offsetBy: -amount)
}
mutating func setPosition(_ index: Int) throws {
if index == self.range.endIndex {
_setPosition(index)
return
}
guard range.contains(index) else { throw Error.invalidPosition }
guard validateUTF8Character(at: index).0 != nil else { throw Error.invalidPosition }
_setPosition(index)
}
}
/// extend Parser to conform to Sequence
extension HBParser: Sequence {
typealias Element = Unicode.Scalar
__consuming func makeIterator() -> Iterator {
return Iterator(self)
}
struct Iterator: IteratorProtocol {
typealias Element = Unicode.Scalar
var parser: HBParser
init(_ parser: HBParser) {
self.parser = parser
}
mutating func next() -> Unicode.Scalar? {
guard !parser.reachedEnd() else { return nil }
return parser.unsafeCurrentAndAdvance()
}
}
}
// internal versions without checks
private extension HBParser {
func unsafeCurrent() -> Unicode.Scalar {
return decodeUTF8Character(at: index).0
}
mutating func unsafeCurrentAndAdvance() -> Unicode.Scalar {
let (unicodeScalar, index) = decodeUTF8Character(at: self.index)
self.index = index
return unicodeScalar
}
mutating func _setPosition(_ index: Int) {
self.index = index
}
func makeString<Bytes: Collection>(_ bytes: Bytes) -> String where Bytes.Element == UInt8, Bytes.Index == Int {
if let string = bytes.withContiguousStorageIfAvailable({ String(decoding: $0, as: Unicode.UTF8.self) }) {
return string
} else {
return String(decoding: bytes, as: Unicode.UTF8.self)
}
}
}
// UTF8 parsing
extension HBParser {
func decodeUTF8Character(at index: Int) -> (Unicode.Scalar, Int) {
var index = index
let byte1 = UInt32(buffer[index])
var value: UInt32
if byte1 & 0xC0 == 0xC0 {
index += 1
let byte2 = UInt32(buffer[index] & 0x3F)
if byte1 & 0xE0 == 0xE0 {
index += 1
let byte3 = UInt32(buffer[index] & 0x3F)
if byte1 & 0xF0 == 0xF0 {
index += 1
let byte4 = UInt32(buffer[index] & 0x3F)
value = (byte1 & 0x7) << 18 + byte2 << 12 + byte3 << 6 + byte4
} else {
value = (byte1 & 0xF) << 12 + byte2 << 6 + byte3
}
} else {
value = (byte1 & 0x1F) << 6 + byte2
}
} else {
value = byte1 & 0x7F
}
let unicodeScalar = Unicode.Scalar(value)!
return (unicodeScalar, index + 1)
}
func skipUTF8Character(at index: Int) -> Int {
if buffer[index] & 0x80 != 0x80 { return index + 1 }
if buffer[index + 1] & 0xC0 == 0x80 { return index + 2 }
if buffer[index + 2] & 0xC0 == 0x80 { return index + 3 }
return index + 4
}
func backOneUTF8Character(at index: Int) -> Int {
if buffer[index - 1] & 0xC0 != 0x80 { return index - 1 }
if buffer[index - 2] & 0xC0 != 0x80 { return index - 2 }
if buffer[index - 3] & 0xC0 != 0x80 { return index - 3 }
return index - 4
}
/// same as `decodeUTF8Character` but adds extra validation, so we can make assumptions later on in decode and skip
func validateUTF8Character(at index: Int) -> (Unicode.Scalar?, Int) {
var index = index
let byte1 = UInt32(buffer[index])
var value: UInt32
if byte1 & 0xC0 == 0xC0 {
index += 1
let byte = UInt32(buffer[index])
guard byte & 0xC0 == 0x80 else { return (nil, index) }
let byte2 = UInt32(byte & 0x3F)
if byte1 & 0xE0 == 0xE0 {
index += 1
let byte = UInt32(buffer[index])
guard byte & 0xC0 == 0x80 else { return (nil, index) }
let byte3 = UInt32(byte & 0x3F)
if byte1 & 0xF0 == 0xF0 {
index += 1
let byte = UInt32(buffer[index])
guard byte & 0xC0 == 0x80 else { return (nil, index) }
let byte4 = UInt32(byte & 0x3F)
value = (byte1 & 0x7) << 18 + byte2 << 12 + byte3 << 6 + byte4
} else {
value = (byte1 & 0xF) << 12 + byte2 << 6 + byte3
}
} else {
value = (byte1 & 0x1F) << 6 + byte2
}
} else {
value = byte1 & 0x7F
}
let unicodeScalar = Unicode.Scalar(value)
return (unicodeScalar, index + 1)
}
/// return if the buffer is valid UTF8
func validateUTF8() -> Bool {
var index = range.startIndex
while index < range.endIndex {
let (scalar, newIndex) = validateUTF8Character(at: index)
guard scalar != nil else { return false }
index = newIndex
}
return true
}
private static let asciiHexValues: [UInt8] = [
/* 00 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 08 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 10 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 18 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 20 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 28 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 30 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
/* 38 */ 0x08, 0x09, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 40 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80,
/* 48 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 50 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 58 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 60 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80,
/* 68 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 70 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 78 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 80 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 88 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 90 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* 98 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* A0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* A8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* B0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* B8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* C0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* C8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* D0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* D8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* E0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* E8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* F0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
/* F8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
]
/// percent decode UTF8
func percentDecode() -> String? {
struct DecodeError: Swift.Error {}
func _percentDecode(_ original: ArraySlice<UInt8>, _ bytes: UnsafeMutableBufferPointer<UInt8>) throws -> Int {
var newIndex = 0
var index = original.startIndex
while index < original.endIndex {
// if we have found a percent sign
if original[index] == 0x25 {
let high = Self.asciiHexValues[Int(original[index + 1])]
let low = Self.asciiHexValues[Int(original[index + 2])]
index += 3
if ((high | low) & 0x80) != 0 {
throw DecodeError()
}
bytes[newIndex] = (high << 4) | low
newIndex += 1
} else {
bytes[newIndex] = original[index]
newIndex += 1
index += 1
}
}
return newIndex
}
guard index != range.endIndex else { return "" }
do {
if #available(macOS 11, *) {
return try String(unsafeUninitializedCapacity: range.endIndex - index) { bytes -> Int in
try _percentDecode(self.buffer[self.index ..< range.endIndex], bytes)
}
} else {
let newBuffer = try [UInt8].init(unsafeUninitializedCapacity: range.endIndex - index) { bytes, count in
try count = _percentDecode(self.buffer[self.index ..< range.endIndex], bytes)
}
return makeString(newBuffer)
}
} catch {
return nil
}
}
}
extension Unicode.Scalar {
var isWhitespace: Bool {
return properties.isWhitespace
}
var isNewline: Bool {
switch value {
case 0x000A ... 0x000D /* LF ... CR */: return true
case 0x0085 /* NEXT LINE (NEL) */: return true
case 0x2028 /* LINE SEPARATOR */: return true
case 0x2029 /* PARAGRAPH SEPARATOR */: return true
default: return false
}
}
var isNumber: Bool {
return properties.numericType != nil
}
var isLetter: Bool {
return properties.isAlphabetic
}
var isLetterOrNumber: Bool {
return isLetter || isNumber
}
}
extension Set where Element == Unicode.Scalar {
init(_ string: String) {
self = Set(string.unicodeScalars)
mutating func unsafeSetPosition(_ position: String.Index) {
self.position = position
}
}

View File

@@ -1,6 +1,11 @@
extension HBMustacheTemplate {
enum Error: Swift.Error {
public struct ParserError: Swift.Error {
public let context: HBParser.Context
public let error: Swift.Error
}
public enum Error: Swift.Error {
case sectionCloseNameIncorrect
case unfinishedName
case expectedSectionEnd
@@ -46,23 +51,27 @@ extension HBMustacheTemplate {
/// parse mustache text to generate a list of tokens
static func parse(_ string: String) throws -> [Token] {
var parser = HBParser(string)
return try parse(&parser, state: .init())
do {
return try parse(&parser, state: .init())
} catch {
throw ParserError(context: parser.getContext(), error: error)
}
}
/// parse section in mustache text
static func parse(_ parser: inout HBParser, state: ParserState) throws -> [Token] {
var tokens: [Token] = []
var state = state
var whiteSpaceBefore: String = ""
var whiteSpaceBefore: Substring = ""
while !parser.reachedEnd() {
// if new line read whitespace
if state.newLine {
whiteSpaceBefore = parser.read(while: Set(" \t")).string
whiteSpaceBefore = parser.read(while: Set(" \t"))
}
let text = try readUntilDelimiterOrNewline(&parser, state: state)
// if we hit a newline add text
if parser.current() == "\n" {
tokens.append(.text(whiteSpaceBefore + text + "\n"))
if parser.current().isNewline {
tokens.append(.text(whiteSpaceBefore + text + String(parser.current())))
state.newLine = true
parser.unsafeAdvance()
continue
@@ -87,7 +96,7 @@ extension HBMustacheTemplate {
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method))
@@ -100,7 +109,7 @@ extension HBMustacheTemplate {
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method))
@@ -109,14 +118,16 @@ extension HBMustacheTemplate {
case "/":
// end of section
parser.unsafeAdvance()
let position = parser.position
let (name, method) = try parseName(&parser, state: state)
guard name == state.sectionName, method == state.sectionMethod else {
parser.unsafeSetPosition(position)
throw Error.sectionCloseNameIncorrect
}
if isStandalone(&parser, state: state) {
setNewLine = true
} else if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
return tokens
@@ -130,7 +141,7 @@ extension HBMustacheTemplate {
case "{":
// unescaped variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
parser.unsafeAdvance()
@@ -141,7 +152,7 @@ extension HBMustacheTemplate {
case "&":
// unescaped variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
parser.unsafeAdvance()
@@ -153,11 +164,11 @@ extension HBMustacheTemplate {
parser.unsafeAdvance()
let (name, _) = try parseName(&parser, state: state)
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
}
if isStandalone(&parser, state: state) {
setNewLine = true
tokens.append(.partial(name, indentation: whiteSpaceBefore))
tokens.append(.partial(name, indentation: String(whiteSpaceBefore)))
} else {
tokens.append(.partial(name, indentation: nil))
}
@@ -172,7 +183,7 @@ extension HBMustacheTemplate {
default:
// variable
if whiteSpaceBefore.count > 0 {
tokens.append(.text(whiteSpaceBefore))
tokens.append(.text(String(whiteSpaceBefore)))
whiteSpaceBefore = ""
}
let (name, method) = try parseName(&parser, state: state)
@@ -189,24 +200,23 @@ extension HBMustacheTemplate {
/// read until we hit either the start delimiter of a tag or a newline
static func readUntilDelimiterOrNewline(_ parser: inout HBParser, state: ParserState) throws -> String {
var untilSet = Set("\n")
guard let delimiterFirstChar = state.startDelimiter.first,
let delimiterFirstScalar = delimiterFirstChar.unicodeScalars.first else { return "" }
var untilSet: Set<Character> = ["\n", "\r\n"]
guard let delimiterFirstChar = state.startDelimiter.first else { return "" }
var totalText = ""
untilSet.insert(delimiterFirstScalar)
untilSet.insert(delimiterFirstChar)
while !parser.reachedEnd() {
// read until we hit either a newline or "{"
let text = try parser.read(until: untilSet, throwOnOverflow: false).string
let text = try parser.read(until: untilSet, throwOnOverflow: false)
totalText += text
// if new line append all text read plus newline
if parser.current() == "\n" {
if parser.current().isNewline {
break
} else if parser.current() == delimiterFirstScalar {
if try parser.read(state.startDelimiter) {
} else if parser.current() == delimiterFirstChar {
if try parser.read(string: state.startDelimiter) {
break
}
totalText += String(delimiterFirstScalar)
totalText += String(delimiterFirstChar)
parser.unsafeAdvance()
}
}
@@ -216,59 +226,67 @@ extension HBMustacheTemplate {
/// parse variable name
static func parseName(_ parser: inout HBParser, state: ParserState) throws -> (String, String?) {
parser.read(while: \.isWhitespace)
var text = parser.read(while: sectionNameChars)
let text = String(parser.read(while: sectionNameChars))
parser.read(while: \.isWhitespace)
guard try parser.read(state.endDelimiter) else { throw Error.unfinishedName }
guard try parser.read(string: state.endDelimiter) else { throw Error.unfinishedName }
// does the name include brackets. If so this is a method call
let string = text.read(while: sectionNameCharsWithoutBrackets)
if text.reachedEnd() {
return (text.string, nil)
var nameParser = HBParser(String(text))
let string = nameParser.read(while: sectionNameCharsWithoutBrackets)
if nameParser.reachedEnd() {
return (text, nil)
} else {
// parse function parameter, as we have just parsed a function name
guard text.current() == "(" else { throw Error.unfinishedName }
text.unsafeAdvance()
let string2 = text.read(while: sectionNameCharsWithoutBrackets)
guard text.current() == ")" else { throw Error.unfinishedName }
text.unsafeAdvance()
guard text.reachedEnd() else { throw Error.unfinishedName }
return (string2.string, string.string)
guard nameParser.current() == "(" else { throw Error.unfinishedName }
nameParser.unsafeAdvance()
let string2 = nameParser.read(while: sectionNameCharsWithoutBrackets)
guard nameParser.current() == ")" else { throw Error.unfinishedName }
nameParser.unsafeAdvance()
guard nameParser.reachedEnd() else { throw Error.unfinishedName }
return (String(string2), String(string))
}
}
static func parseComment(_ parser: inout HBParser, state: ParserState) throws -> String {
let text = try parser.read(untilString: state.endDelimiter, throwOnOverflow: true, skipToEnd: true)
return text.string
return String(text)
}
static func parserSetDelimiter(_ parser: inout HBParser, state: ParserState) throws -> ParserState {
parser.read(while: \.isWhitespace)
let startDelimiter = try parser.read(until: \.isWhitespace).string
parser.read(while: \.isWhitespace)
let endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace }).string
parser.read(while: \.isWhitespace)
let startDelimiter: Substring
let endDelimiter: Substring
do {
parser.read(while: \.isWhitespace)
startDelimiter = try parser.read(until: \.isWhitespace)
parser.read(while: \.isWhitespace)
endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace })
parser.read(while: \.isWhitespace)
} catch {
throw Error.invalidSetDelimiter
}
guard try parser.read("=") else { throw Error.invalidSetDelimiter }
guard try parser.read(state.endDelimiter) else { throw Error.invalidSetDelimiter }
guard try parser.read(string: state.endDelimiter) else { throw Error.invalidSetDelimiter }
guard startDelimiter.count > 0, endDelimiter.count > 0 else { throw Error.invalidSetDelimiter }
return state.withDelimiters(start: startDelimiter, end: endDelimiter)
return state.withDelimiters(start: String(startDelimiter), end: String(endDelimiter))
}
static func hasLineFinished(_ parser: inout HBParser) -> Bool {
var parser2 = parser
if parser.reachedEnd() { return true }
parser2.read(while: Set(" \t\r"))
if parser2.current() == "\n" {
parser2.read(while: Set(" \t"))
if parser2.current().isNewline {
parser2.unsafeAdvance()
try! parser.setPosition(parser2.getPosition())
try! parser.setPosition(parser2.position)
return true
}
return false
}
static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool {
static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool {
return state.newLine && hasLineFinished(&parser)
}
private static let sectionNameCharsWithoutBrackets = Set<Unicode.Scalar>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?")
private static let sectionNameChars = Set<Unicode.Scalar>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()")
private static let sectionNameCharsWithoutBrackets = Set<Character>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?")
private static let sectionNameChars = Set<Character>("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()")
}

View File

@@ -0,0 +1,79 @@
import HummingbirdMustache
import XCTest
final class ErrorTests: XCTestCase {
func testSectionCloseNameIncorrect() {
XCTAssertThrowsError(try HBMustacheTemplate(string: """
{{#test}}
{{.}}
{{/test2}}
""")) { error in
switch error {
case let error as HBMustacheTemplate.ParserError:
XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .sectionCloseNameIncorrect)
XCTAssertEqual(error.context.line, "{{/test2}}")
XCTAssertEqual(error.context.lineNumber, 3)
XCTAssertEqual(error.context.columnNumber, 4)
default:
XCTFail("\(error)")
}
}
}
func testUnfinishedName() {
XCTAssertThrowsError(try HBMustacheTemplate(string: """
{{#test}}
{{name}
{{/test2}}
""")) { error in
switch error {
case let error as HBMustacheTemplate.ParserError:
XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .unfinishedName)
XCTAssertEqual(error.context.line, "{{name}")
XCTAssertEqual(error.context.lineNumber, 2)
XCTAssertEqual(error.context.columnNumber, 7)
default:
XCTFail("\(error)")
}
}
}
func testExpectedSectionEnd() {
XCTAssertThrowsError(try HBMustacheTemplate(string: """
{{#test}}
{{.}}
""")) { error in
switch error {
case let error as HBMustacheTemplate.ParserError:
XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .expectedSectionEnd)
XCTAssertEqual(error.context.line, "{{.}}")
XCTAssertEqual(error.context.lineNumber, 2)
XCTAssertEqual(error.context.columnNumber, 6)
default:
XCTFail("\(error)")
}
}
}
func testInvalidSetDelimiter() {
XCTAssertThrowsError(try HBMustacheTemplate(string: """
{{=<% %>=}}
<%.%>
<%={{}}=%>
""")) { error in
switch error {
case let error as HBMustacheTemplate.ParserError:
XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .invalidSetDelimiter)
XCTAssertEqual(error.context.line, "<%={{}}=%>")
XCTAssertEqual(error.context.lineNumber, 3)
XCTAssertEqual(error.context.columnNumber, 4)
default:
XCTFail("\(error)")
}
}
}
}

View File

@@ -5,14 +5,40 @@ final class LibraryTests: XCTestCase {
func testDirectoryLoad() throws {
let fs = FileManager()
try? fs.createDirectory(atPath: "templates", withIntermediateDirectories: false)
let mustache = "<test>{{#value}}<value>{{.}}</value>{{/value}}</test>"
let data = Data(mustache.utf8)
defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates")) }
try data.write(to: URL(fileURLWithPath: "templates/test.mustache"))
let mustache = Data("<test>{{#value}}<value>{{.}}</value>{{/value}}</test>".utf8)
try mustache.write(to: URL(fileURLWithPath: "templates/test.mustache"))
defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/test.mustache")) }
let library = try HBMustacheLibrary(directory: "./templates")
let object = ["value": ["value1", "value2"]]
XCTAssertEqual(library.render(object, withTemplate: "test"), "<test><value>value1</value><value>value2</value></test>")
}
func testLibraryParserError() throws {
let fs = FileManager()
try? fs.createDirectory(atPath: "templates", withIntermediateDirectories: false)
defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates")) }
let mustache = Data("<test>{{#value}}<value>{{.}}</value>{{/value}}</test>".utf8)
try mustache.write(to: URL(fileURLWithPath: "templates/test.mustache"))
defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/test.mustache")) }
let mustache2 = Data("""
{{#test}}
{{{name}}
{{/test2}}
""".utf8)
try mustache2.write(to: URL(fileURLWithPath: "templates/error.mustache"))
defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/error.mustache")) }
XCTAssertThrowsError(try HBMustacheLibrary(directory: "./templates")) { error in
guard let parserError = error as? HBMustacheLibrary.ParserError else {
XCTFail("\(error)")
return
}
XCTAssertEqual(parserError.filename, "error.mustache")
XCTAssertEqual(parserError.context.line, "{{{name}}")
XCTAssertEqual(parserError.context.lineNumber, 2)
XCTAssertEqual(parserError.context.columnNumber, 10)
}
}
}

View File

@@ -128,6 +128,12 @@ final class MethodTests: XCTestCase {
""")
}
func testListOutput() throws {
let object = [1, 2, 3, 4]
let template = try HBMustacheTemplate(string: "{{#.}}{{.}}{{^last()}}, {{/last()}}{{/.}}")
XCTAssertEqual(template.render(object), "1, 2, 3, 4")
}
func testDictionaryEnumerated() throws {
let template = try HBMustacheTemplate(string: """
{{#enumerated(.)}}<b>{{ key }} = {{ value }}</b>{{/enumerated(.)}}

View File

@@ -31,39 +31,6 @@ final class TemplateParserTests: XCTestCase {
let template = try HBMustacheTemplate(string: "{{ section }}")
XCTAssertEqual(template.tokens, [.variable(name: "section")])
}
func testSectionEndError() throws {
XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{#section}}")) { error in
switch error {
case HBMustacheTemplate.Error.expectedSectionEnd:
break
default:
XCTFail("\(error)")
}
}
}
func testSectionCloseNameIncorrectError() throws {
XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{#section}}{{/error}}")) { error in
switch error {
case HBMustacheTemplate.Error.sectionCloseNameIncorrect:
break
default:
XCTFail("\(error)")
}
}
}
func testUnmatchedNameError() throws {
XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{section#}}")) { error in
switch error {
case HBMustacheTemplate.Error.unfinishedName:
break
default:
XCTFail("\(error)")
}
}
}
}
extension HBMustacheTemplate: Equatable {