From 05740bd7bc5d7feaf8bbc6af5e07310fef47649d Mon Sep 17 00:00:00 2001 From: Adam Fowler Date: Thu, 18 Mar 2021 17:26:42 +0000 Subject: [PATCH] Replace raw UTF8 parser with String parser (#6) * Replace UTF8 parser with String based one * swift format * Add parsing errors that include context about where error is * Remove old error tests --- .../Library+FileSystem.swift | 4 +- Sources/HummingbirdMustache/Library.swift | 10 +- Sources/HummingbirdMustache/Parser.swift | 763 ++++++------------ .../HummingbirdMustache/Template+Parser.swift | 116 +-- .../HummingbirdMustacheTests/ErrorTests.swift | 79 ++ .../LibraryTests.swift | 32 +- .../MethodTests.swift | 6 + .../TemplateParserTests.swift | 33 - 8 files changed, 421 insertions(+), 622 deletions(-) create mode 100644 Tests/HummingbirdMustacheTests/ErrorTests.swift diff --git a/Sources/HummingbirdMustache/Library+FileSystem.swift b/Sources/HummingbirdMustache/Library+FileSystem.swift index 643d191..1c8c6de 100644 --- a/Sources/HummingbirdMustache/Library+FileSystem.swift +++ b/Sources/HummingbirdMustache/Library+FileSystem.swift @@ -17,8 +17,8 @@ extension HBMustacheLibrary { let template: HBMustacheTemplate do { template = try HBMustacheTemplate(string: string) - } catch { - throw Error.failedToLoad(path, error) + } catch let error as HBMustacheTemplate.ParserError { + throw ParserError(filename: path, context: error.context, error: error.error) } // drop ".mustache" from path to get name let name = String(path.dropLast(extWithDot.count)) diff --git a/Sources/HummingbirdMustache/Library.swift b/Sources/HummingbirdMustache/Library.swift index 23d1d1f..3bdbdd7 100644 --- a/Sources/HummingbirdMustache/Library.swift +++ b/Sources/HummingbirdMustache/Library.swift @@ -47,8 +47,14 @@ public final class HBMustacheLibrary { return template.render(object) } - public enum Error: Swift.Error { - case failedToLoad(String, Swift.Error) + /// Error returned by init() when parser fails + public struct ParserError: Swift.Error { + /// File error occurred in + public let filename: String + /// Context (line, linenumber and column number) + public let context: HBParser.Context + /// Actual error that occurred + public let error: Error } private var templates: [String: HBMustacheTemplate] diff --git a/Sources/HummingbirdMustache/Parser.swift b/Sources/HummingbirdMustache/Parser.swift index d5495d0..0c92180 100644 --- a/Sources/HummingbirdMustache/Parser.swift +++ b/Sources/HummingbirdMustache/Parser.swift @@ -1,100 +1,66 @@ -// Parser.swift -// -// Half inspired by Reader class from John Sundell's Ink project -// https://github.com/JohnSundell/Ink/blob/master/Sources/Ink/Internal/Reader.swift -// with optimisation working ie removing String and doing my own UTF8 processing inspired by Fabian Fett's work in -// https://github.com/swift-extras/swift-extras-json/blob/main/Sources/ExtrasJSON/Parsing/DocumentReader.swift -// -// This is a copy of the parser from Hummingbird. I am not using the version in Hummingbird to avoid the dependency import Foundation /// Reader object for parsing String buffers -struct HBParser { +public struct HBParser { enum Error: Swift.Error { case overflow - case unexpected - case emptyString - case invalidUTF8 - case invalidPosition } - /// Create a Parser object - /// - Parameter string: UTF8 data to parse - init?(_ utf8Data: Bytes, validateUTF8: Bool = true) where Bytes.Element == UInt8 { - if let buffer = utf8Data as? [UInt8] { + /// internal storage used to store String + private class Storage { + init(_ buffer: String) { self.buffer = buffer - } else { - buffer = Array(utf8Data) } - index = 0 - range = 0 ..< buffer.endIndex - // should check that the data is valid utf8 - if validateUTF8 == true, self.validateUTF8() == false { - return nil - } + let buffer: String } + private let _storage: Storage + + /// Create a Reader object + /// - Parameter string: String to parse init(_ string: String) { - buffer = Array(string.utf8) - index = 0 - range = 0 ..< buffer.endIndex + _storage = Storage(string) + position = string.startIndex } - /// Return contents of parser as a string - var count: Int { - return range.count - } - - /// Return contents of parser as a string - var string: String { - return makeString(buffer[range]) - } - - private var buffer: [UInt8] - private var index: Int - private let range: Range -} - -// MARK: sub-parsers - -extension HBParser { - /// initialise a parser that parses a section of the buffer attached to another parser - init(_ parser: HBParser, range: Range) { - buffer = parser.buffer - index = range.startIndex - self.range = range - - precondition(range.startIndex >= 0 && range.endIndex <= buffer.endIndex) - precondition(buffer[range.startIndex] & 0xC0 != 0x80) // check we arent in the middle of a UTF8 character - } - - /// initialise a parser that parses a section of the buffer attached to this parser - func subParser(_ range: Range) -> HBParser { - if range.startIndex == range.endIndex { - return HBParser(self, range: self.range.startIndex ..< self.range.startIndex) - } - return HBParser(self, range: range) - } + var buffer: String { return _storage.buffer } + private(set) var position: String.Index } extension HBParser { /// Return current character /// - Throws: .overflow /// - Returns: Current character - mutating func character() throws -> Unicode.Scalar { - guard !reachedEnd() else { throw Error.overflow } - return unsafeCurrentAndAdvance() + mutating func character() throws -> Character { + guard !reachedEnd() else { throw HBParser.Error.overflow } + let c = unsafeCurrent() + unsafeAdvance() + return c } /// Read the current character and return if it is as intended. If character test returns true then move forward 1 /// - Parameter char: character to compare against /// - Throws: .overflow /// - Returns: If current character was the one we expected - mutating func read(_ char: Unicode.Scalar) throws -> Bool { - let initialIndex = index + mutating func read(_ char: Character) throws -> Bool { let c = try character() - guard c == char else { index = initialIndex; return false } + guard c == char else { unsafeRetreat(); return false } + return true + } + + /// Read the current character and return if it is as intended. If character test returns true then move forward 1 + /// - Parameter char: character to compare against + /// - Throws: .overflow + /// - Returns: If current character was the one we expected + mutating func read(string: String) throws -> Bool { + let initialPosition = position + guard string.count > 0 else { return true } + let subString = try read(count: string.count) + guard subString == string else { + position = initialPosition + return false + } return true } @@ -102,22 +68,9 @@ extension HBParser { /// - Parameter characterSet: Set of characters to compare against /// - Throws: .overflow /// - Returns: If current character is in character set - mutating func read(_ characterSet: Set) throws -> Bool { - let initialIndex = index + mutating func read(_ characterSet: Set) throws -> Bool { let c = try character() - guard characterSet.contains(c) else { index = initialIndex; return false } - return true - } - - /// Compare characters at current position against provided string. If the characters are the same as string provided advance past string - /// - Parameter string: String to compare against - /// - Throws: .overflow, .emptyString - /// - Returns: If characters at current position equal string - mutating func read(_ string: String) throws -> Bool { - let initialIndex = index - guard string.count > 0 else { throw Error.emptyString } - let subString = try read(count: string.count) - guard subString.string == string else { index = initialIndex; return false } + guard characterSet.contains(c) else { unsafeRetreat(); return false } return true } @@ -125,93 +78,31 @@ extension HBParser { /// - Parameter count: Number of characters to read /// - Throws: .overflow /// - Returns: The string read from the buffer - mutating func read(count: Int) throws -> HBParser { - var count = count - var readEndIndex = index - while count > 0 { - guard readEndIndex != range.endIndex else { throw Error.overflow } - readEndIndex = skipUTF8Character(at: readEndIndex) - count -= 1 - } - let result = subParser(index ..< readEndIndex) - index = readEndIndex - return result + mutating func read(count: Int) throws -> Substring { + guard buffer.distance(from: position, to: buffer.endIndex) >= count else { throw HBParser.Error.overflow } + let end = buffer.index(position, offsetBy: count) + let subString = buffer[position ..< end] + unsafeAdvance(by: count) + return subString } /// Read from buffer until we hit a character. Position after this is of the character we were checking for - /// - Parameter until: Unicode.Scalar to read until + /// - Parameter until: Character to read until /// - Throws: .overflow if we hit the end of the buffer before reading character /// - Returns: String read from buffer - @discardableResult mutating func read(until: Unicode.Scalar, throwOnOverflow: Bool = true) throws -> HBParser { - let startIndex = index + @discardableResult mutating func read(until: Character, throwOnOverflow: Bool = true) throws -> Substring { + let startIndex = position while !reachedEnd() { if unsafeCurrent() == until { - return subParser(startIndex ..< index) + return buffer[startIndex ..< position] } unsafeAdvance() } if throwOnOverflow { - _setPosition(startIndex) - throw Error.overflow + unsafeSetPosition(startIndex) + throw HBParser.Error.overflow } - return subParser(startIndex ..< index) - } - - /// Read from buffer until we hit a character in supplied set. Position after this is of the character we were checking for - /// - Parameter characterSet: Unicode.Scalar set to check against - /// - Throws: .overflow - /// - Returns: String read from buffer - @discardableResult mutating func read(until characterSet: Set, throwOnOverflow: Bool = true) throws -> HBParser { - let startIndex = index - while !reachedEnd() { - if characterSet.contains(unsafeCurrent()) { - return subParser(startIndex ..< index) - } - unsafeAdvance() - } - if throwOnOverflow { - _setPosition(startIndex) - throw Error.overflow - } - return subParser(startIndex ..< index) - } - - /// Read from buffer until we hit a character that returns true for supplied closure. Position after this is of the character we were checking for - /// - Parameter until: Function to test - /// - Throws: .overflow - /// - Returns: String read from buffer - @discardableResult mutating func read(until: (Unicode.Scalar) -> Bool, throwOnOverflow: Bool = true) throws -> HBParser { - let startIndex = index - while !reachedEnd() { - if until(unsafeCurrent()) { - return subParser(startIndex ..< index) - } - unsafeAdvance() - } - if throwOnOverflow { - _setPosition(startIndex) - throw Error.overflow - } - return subParser(startIndex ..< index) - } - - /// Read from buffer until we hit a character where supplied KeyPath is true. Position after this is of the character we were checking for - /// - Parameter characterSet: Unicode.Scalar set to check against - /// - Throws: .overflow - /// - Returns: String read from buffer - @discardableResult mutating func read(until keyPath: KeyPath, throwOnOverflow: Bool = true) throws -> HBParser { - let startIndex = index - while !reachedEnd() { - if unsafeCurrent()[keyPath: keyPath] { - return subParser(startIndex ..< index) - } - unsafeAdvance() - } - if throwOnOverflow { - _setPosition(startIndex) - throw Error.overflow - } - return subParser(startIndex ..< index) + return buffer[startIndex ..< position] } /// Read from buffer until we hit a string. By default the position after this is of the beginning of the string we were checking for @@ -220,52 +111,106 @@ extension HBParser { /// - Parameter skipToEnd: Should we set the position to after the found string /// - Throws: .overflow, .emptyString /// - Returns: String read from buffer - @discardableResult mutating func read(untilString: String, throwOnOverflow: Bool = true, skipToEnd: Bool = false) throws -> HBParser { - var untilString = untilString - return try untilString.withUTF8 { utf8 in - guard utf8.count > 0 else { throw Error.emptyString } - let startIndex = index - var foundIndex = index - var untilIndex = 0 - while !reachedEnd() { - if buffer[index] == utf8[untilIndex] { - if untilIndex == 0 { - foundIndex = index - } - untilIndex += 1 - if untilIndex == utf8.endIndex { - unsafeAdvance() - if skipToEnd == false { - index = foundIndex - } - let result = subParser(startIndex ..< foundIndex) - return result - } - } else { - untilIndex = 0 + @discardableResult mutating func read(untilString: String, throwOnOverflow: Bool = true, skipToEnd: Bool = false) throws -> Substring { + guard untilString.count > 0 else { return "" } + let startIndex = position + var foundIndex = position + var untilIndex = untilString.startIndex + while !reachedEnd() { + if unsafeCurrent() == untilString[untilIndex] { + if untilIndex == untilString.startIndex { + foundIndex = position } - index += 1 + untilIndex = untilString.index(after: untilIndex) + if untilIndex == untilString.endIndex { + unsafeAdvance() + if skipToEnd == false { + position = foundIndex + } + let result = buffer[startIndex ..< foundIndex] + return result + } + } else { + untilIndex = untilString.startIndex } - if throwOnOverflow { - _setPosition(startIndex) - throw Error.overflow - } - return subParser(startIndex ..< index) + unsafeAdvance() } + if throwOnOverflow { + position = startIndex + throw Error.overflow + } + return buffer[startIndex ..< position] + } + + /// Read from buffer until we hit a character in supplied set. Position after this is of the character we were checking for + /// - Parameter characterSet: Character set to check against + /// - Throws: .overflow + /// - Returns: String read from buffer + @discardableResult mutating func read(until characterSet: Set, throwOnOverflow: Bool = true) throws -> Substring { + let startIndex = position + while !reachedEnd() { + if characterSet.contains(unsafeCurrent()) { + return buffer[startIndex ..< position] + } + unsafeAdvance() + } + if throwOnOverflow { + unsafeSetPosition(startIndex) + throw HBParser.Error.overflow + } + return buffer[startIndex ..< position] + } + + /// Read from buffer until keyPath on character returns true. Position after this is of the character we were checking for + /// - Parameter keyPath: keyPath to check + /// - Throws: .overflow + /// - Returns: String read from buffer + @discardableResult mutating func read(until keyPath: KeyPath, throwOnOverflow: Bool = true) throws -> Substring { + let startIndex = position + while !reachedEnd() { + if current()[keyPath: keyPath] { + return buffer[startIndex ..< position] + } + unsafeAdvance() + } + if throwOnOverflow { + position = startIndex + throw Error.overflow + } + return buffer[startIndex ..< position] + } + + /// Read from buffer until keyPath on character returns true. Position after this is of the character we were checking for + /// - Parameter keyPath: keyPath to check + /// - Throws: .overflow + /// - Returns: String read from buffer + @discardableResult mutating func read(until cb: (Character) -> Bool, throwOnOverflow: Bool = true) throws -> Substring { + let startIndex = position + while !reachedEnd() { + if cb(current()) { + return buffer[startIndex ..< position] + } + unsafeAdvance() + } + if throwOnOverflow { + position = startIndex + throw Error.overflow + } + return buffer[startIndex ..< position] } /// Read from buffer from current position until the end of the buffer /// - Returns: String read from buffer - @discardableResult mutating func readUntilTheEnd() -> HBParser { - let startIndex = index - index = range.endIndex - return subParser(startIndex ..< index) + @discardableResult mutating func readUntilTheEnd() -> Substring { + let startIndex = position + position = buffer.endIndex + return buffer[startIndex ..< position] } /// Read while character at current position is the one supplied - /// - Parameter while: Unicode.Scalar to check against + /// - Parameter while: Character to check against /// - Returns: String read from buffer - @discardableResult mutating func read(while: Unicode.Scalar) -> Int { + @discardableResult mutating func read(while: Character) -> Int { var count = 0 while !reachedEnd(), unsafeCurrent() == `while` @@ -276,68 +221,74 @@ extension HBParser { return count } - /// Read while character at current position is in supplied set - /// - Parameter while: character set to check + /// Read while keyPath on character at current position returns true is the one supplied + /// - Parameter while: keyPath to check /// - Returns: String read from buffer - @discardableResult mutating func read(while characterSet: Set) -> HBParser { - let startIndex = index - while !reachedEnd(), - characterSet.contains(unsafeCurrent()) - { - unsafeAdvance() - } - return subParser(startIndex ..< index) - } - - /// Read while character returns true for supplied closure - /// - Parameter while: character set to check - /// - Returns: String read from buffer - @discardableResult mutating func read(while: (Unicode.Scalar) -> Bool) -> HBParser { - let startIndex = index - while !reachedEnd(), - `while`(unsafeCurrent()) - { - unsafeAdvance() - } - return subParser(startIndex ..< index) - } - - /// Read while character returns true for supplied KeyPath - /// - Parameter while: character set to check - /// - Returns: String read from buffer - @discardableResult mutating func read(while keyPath: KeyPath) -> HBParser { - let startIndex = index + @discardableResult mutating func read(while keyPath: KeyPath) -> Substring { + let startIndex = position while !reachedEnd(), unsafeCurrent()[keyPath: keyPath] { unsafeAdvance() } - return subParser(startIndex ..< index) + return buffer[startIndex ..< position] } - /// Split parser into sections separated by character - /// - Parameter separator: Separator character - /// - Returns: arrays of sub parsers - mutating func split(separator: Unicode.Scalar) -> [HBParser] { - var subParsers: [HBParser] = [] - while !reachedEnd() { - do { - let section = try read(until: separator) - subParsers.append(section) - unsafeAdvance() - } catch { - if !reachedEnd() { - subParsers.append(readUntilTheEnd()) - } - } + /// Read while character at current position is in supplied set + /// - Parameter while: character set to check + /// - Returns: String read from buffer + @discardableResult mutating func read(while characterSet: Set) -> Substring { + let startIndex = position + while !reachedEnd(), + characterSet.contains(unsafeCurrent()) + { + unsafeAdvance() } - return subParsers + return buffer[startIndex ..< position] } /// Return whether we have reached the end of the buffer /// - Returns: Have we reached the end func reachedEnd() -> Bool { - return index == range.endIndex + return position == buffer.endIndex + } + + /// Return whether we are at the start of the buffer + /// - Returns: Are we are the start + func atStart() -> Bool { + return position == buffer.startIndex + } +} + +extension HBParser { + public struct Context { + public let line: String + public let lineNumber: Int + public let columnNumber: Int + } + + /// Return context of current position (line, lineNumber, columnNumber) + func getContext() -> Context { + var parser = self + var columnNumber = 0 + while !parser.atStart() { + try? parser.retreat() + if parser.current() == "\n" { + break + } + columnNumber += 1 + } + if parser.current() == "\n" { + try? parser.advance() + } + // read line from parser + let line = try! parser.read(until: Character("\n"), throwOnOverflow: false) + // count new lines up to this current position + let buffer = parser.buffer + let textBefore = buffer[buffer.startIndex ..< position] + let lineNumber = textBefore.filter { $0.isNewline }.count + + return Context(line: String(line), lineNumber: lineNumber + 1, columnNumber: columnNumber + 1) } } @@ -345,325 +296,71 @@ extension HBParser { extension HBParser { /// Return the character at the current position /// - Throws: .overflow - /// - Returns: Unicode.Scalar - func current() -> Unicode.Scalar { - guard !reachedEnd() else { return Unicode.Scalar(0) } + /// - Returns: Character + func current() -> Character { + guard !reachedEnd() else { return "\0" } return unsafeCurrent() } /// Move forward one character /// - Throws: .overflow mutating func advance() throws { - guard !reachedEnd() else { throw Error.overflow } + guard !reachedEnd() else { throw HBParser.Error.overflow } return unsafeAdvance() } + /// Move back one character + /// - Throws: .overflow + mutating func retreat() throws { + guard position != buffer.startIndex else { throw HBParser.Error.overflow } + return unsafeRetreat() + } + /// Move forward so many character /// - Parameter amount: number of characters to move forward /// - Throws: .overflow mutating func advance(by amount: Int) throws { - var amount = amount - while amount > 0 { - guard !reachedEnd() else { throw Error.overflow } - index = skipUTF8Character(at: index) - amount -= 1 - } - } - - /// Move backwards one character - /// - Throws: .overflow - mutating func retreat() throws { - guard index > range.startIndex else { throw Error.overflow } - index = backOneUTF8Character(at: index) + guard buffer.distance(from: position, to: buffer.endIndex) >= amount else { throw HBParser.Error.overflow } + return unsafeAdvance(by: amount) } /// Move back so many characters /// - Parameter amount: number of characters to move back /// - Throws: .overflow mutating func retreat(by amount: Int) throws { - var amount = amount - while amount > 0 { - guard index > range.startIndex else { throw Error.overflow } - index = backOneUTF8Character(at: index) - amount -= 1 - } + guard buffer.distance(from: buffer.startIndex, to: position) >= amount else { throw HBParser.Error.overflow } + return unsafeRetreat(by: amount) + } + + mutating func setPosition(_ position: String.Index) throws { + guard position <= buffer.endIndex else { throw HBParser.Error.overflow } + unsafeSetPosition(position) + } +} + +// unsafe versions without checks +extension HBParser { + func unsafeCurrent() -> Character { + return buffer[position] } mutating func unsafeAdvance() { - index = skipUTF8Character(at: index) + position = buffer.index(after: position) + } + + mutating func unsafeRetreat() { + position = buffer.index(before: position) } mutating func unsafeAdvance(by amount: Int) { - var amount = amount - while amount > 0 { - index = skipUTF8Character(at: index) - amount -= 1 - } + position = buffer.index(position, offsetBy: amount) } - func getPosition() -> Int { - return index + mutating func unsafeRetreat(by amount: Int) { + position = buffer.index(position, offsetBy: -amount) } - mutating func setPosition(_ index: Int) throws { - if index == self.range.endIndex { - _setPosition(index) - return - } - guard range.contains(index) else { throw Error.invalidPosition } - guard validateUTF8Character(at: index).0 != nil else { throw Error.invalidPosition } - _setPosition(index) - } -} - -/// extend Parser to conform to Sequence -extension HBParser: Sequence { - typealias Element = Unicode.Scalar - - __consuming func makeIterator() -> Iterator { - return Iterator(self) - } - - struct Iterator: IteratorProtocol { - typealias Element = Unicode.Scalar - - var parser: HBParser - - init(_ parser: HBParser) { - self.parser = parser - } - - mutating func next() -> Unicode.Scalar? { - guard !parser.reachedEnd() else { return nil } - return parser.unsafeCurrentAndAdvance() - } - } -} - -// internal versions without checks -private extension HBParser { - func unsafeCurrent() -> Unicode.Scalar { - return decodeUTF8Character(at: index).0 - } - - mutating func unsafeCurrentAndAdvance() -> Unicode.Scalar { - let (unicodeScalar, index) = decodeUTF8Character(at: self.index) - self.index = index - return unicodeScalar - } - - mutating func _setPosition(_ index: Int) { - self.index = index - } - - func makeString(_ bytes: Bytes) -> String where Bytes.Element == UInt8, Bytes.Index == Int { - if let string = bytes.withContiguousStorageIfAvailable({ String(decoding: $0, as: Unicode.UTF8.self) }) { - return string - } else { - return String(decoding: bytes, as: Unicode.UTF8.self) - } - } -} - -// UTF8 parsing -extension HBParser { - func decodeUTF8Character(at index: Int) -> (Unicode.Scalar, Int) { - var index = index - let byte1 = UInt32(buffer[index]) - var value: UInt32 - if byte1 & 0xC0 == 0xC0 { - index += 1 - let byte2 = UInt32(buffer[index] & 0x3F) - if byte1 & 0xE0 == 0xE0 { - index += 1 - let byte3 = UInt32(buffer[index] & 0x3F) - if byte1 & 0xF0 == 0xF0 { - index += 1 - let byte4 = UInt32(buffer[index] & 0x3F) - value = (byte1 & 0x7) << 18 + byte2 << 12 + byte3 << 6 + byte4 - } else { - value = (byte1 & 0xF) << 12 + byte2 << 6 + byte3 - } - } else { - value = (byte1 & 0x1F) << 6 + byte2 - } - } else { - value = byte1 & 0x7F - } - let unicodeScalar = Unicode.Scalar(value)! - return (unicodeScalar, index + 1) - } - - func skipUTF8Character(at index: Int) -> Int { - if buffer[index] & 0x80 != 0x80 { return index + 1 } - if buffer[index + 1] & 0xC0 == 0x80 { return index + 2 } - if buffer[index + 2] & 0xC0 == 0x80 { return index + 3 } - return index + 4 - } - - func backOneUTF8Character(at index: Int) -> Int { - if buffer[index - 1] & 0xC0 != 0x80 { return index - 1 } - if buffer[index - 2] & 0xC0 != 0x80 { return index - 2 } - if buffer[index - 3] & 0xC0 != 0x80 { return index - 3 } - return index - 4 - } - - /// same as `decodeUTF8Character` but adds extra validation, so we can make assumptions later on in decode and skip - func validateUTF8Character(at index: Int) -> (Unicode.Scalar?, Int) { - var index = index - let byte1 = UInt32(buffer[index]) - var value: UInt32 - if byte1 & 0xC0 == 0xC0 { - index += 1 - let byte = UInt32(buffer[index]) - guard byte & 0xC0 == 0x80 else { return (nil, index) } - let byte2 = UInt32(byte & 0x3F) - if byte1 & 0xE0 == 0xE0 { - index += 1 - let byte = UInt32(buffer[index]) - guard byte & 0xC0 == 0x80 else { return (nil, index) } - let byte3 = UInt32(byte & 0x3F) - if byte1 & 0xF0 == 0xF0 { - index += 1 - let byte = UInt32(buffer[index]) - guard byte & 0xC0 == 0x80 else { return (nil, index) } - let byte4 = UInt32(byte & 0x3F) - value = (byte1 & 0x7) << 18 + byte2 << 12 + byte3 << 6 + byte4 - } else { - value = (byte1 & 0xF) << 12 + byte2 << 6 + byte3 - } - } else { - value = (byte1 & 0x1F) << 6 + byte2 - } - } else { - value = byte1 & 0x7F - } - let unicodeScalar = Unicode.Scalar(value) - return (unicodeScalar, index + 1) - } - - /// return if the buffer is valid UTF8 - func validateUTF8() -> Bool { - var index = range.startIndex - while index < range.endIndex { - let (scalar, newIndex) = validateUTF8Character(at: index) - guard scalar != nil else { return false } - index = newIndex - } - return true - } - - private static let asciiHexValues: [UInt8] = [ - /* 00 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 08 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 10 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 18 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 20 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 28 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 30 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - /* 38 */ 0x08, 0x09, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 40 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, - /* 48 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 50 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 58 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 60 */ 0x80, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x80, - /* 68 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 70 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 78 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - - /* 80 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 88 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 90 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* 98 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* A0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* A8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* B0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* B8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* C0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* C8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* D0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* D8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* E0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* E8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* F0 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - /* F8 */ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - ] - - /// percent decode UTF8 - func percentDecode() -> String? { - struct DecodeError: Swift.Error {} - func _percentDecode(_ original: ArraySlice, _ bytes: UnsafeMutableBufferPointer) throws -> Int { - var newIndex = 0 - var index = original.startIndex - - while index < original.endIndex { - // if we have found a percent sign - if original[index] == 0x25 { - let high = Self.asciiHexValues[Int(original[index + 1])] - let low = Self.asciiHexValues[Int(original[index + 2])] - index += 3 - if ((high | low) & 0x80) != 0 { - throw DecodeError() - } - bytes[newIndex] = (high << 4) | low - newIndex += 1 - } else { - bytes[newIndex] = original[index] - newIndex += 1 - index += 1 - } - } - return newIndex - } - - guard index != range.endIndex else { return "" } - do { - if #available(macOS 11, *) { - return try String(unsafeUninitializedCapacity: range.endIndex - index) { bytes -> Int in - try _percentDecode(self.buffer[self.index ..< range.endIndex], bytes) - } - } else { - let newBuffer = try [UInt8].init(unsafeUninitializedCapacity: range.endIndex - index) { bytes, count in - try count = _percentDecode(self.buffer[self.index ..< range.endIndex], bytes) - } - return makeString(newBuffer) - } - } catch { - return nil - } - } -} - -extension Unicode.Scalar { - var isWhitespace: Bool { - return properties.isWhitespace - } - - var isNewline: Bool { - switch value { - case 0x000A ... 0x000D /* LF ... CR */: return true - case 0x0085 /* NEXT LINE (NEL) */: return true - case 0x2028 /* LINE SEPARATOR */: return true - case 0x2029 /* PARAGRAPH SEPARATOR */: return true - default: return false - } - } - - var isNumber: Bool { - return properties.numericType != nil - } - - var isLetter: Bool { - return properties.isAlphabetic - } - - var isLetterOrNumber: Bool { - return isLetter || isNumber - } -} - -extension Set where Element == Unicode.Scalar { - init(_ string: String) { - self = Set(string.unicodeScalars) + mutating func unsafeSetPosition(_ position: String.Index) { + self.position = position } } diff --git a/Sources/HummingbirdMustache/Template+Parser.swift b/Sources/HummingbirdMustache/Template+Parser.swift index e13d8b2..a52292b 100644 --- a/Sources/HummingbirdMustache/Template+Parser.swift +++ b/Sources/HummingbirdMustache/Template+Parser.swift @@ -1,6 +1,11 @@ extension HBMustacheTemplate { - enum Error: Swift.Error { + public struct ParserError: Swift.Error { + public let context: HBParser.Context + public let error: Swift.Error + } + + public enum Error: Swift.Error { case sectionCloseNameIncorrect case unfinishedName case expectedSectionEnd @@ -46,23 +51,27 @@ extension HBMustacheTemplate { /// parse mustache text to generate a list of tokens static func parse(_ string: String) throws -> [Token] { var parser = HBParser(string) - return try parse(&parser, state: .init()) + do { + return try parse(&parser, state: .init()) + } catch { + throw ParserError(context: parser.getContext(), error: error) + } } /// parse section in mustache text static func parse(_ parser: inout HBParser, state: ParserState) throws -> [Token] { var tokens: [Token] = [] var state = state - var whiteSpaceBefore: String = "" + var whiteSpaceBefore: Substring = "" while !parser.reachedEnd() { // if new line read whitespace if state.newLine { - whiteSpaceBefore = parser.read(while: Set(" \t")).string + whiteSpaceBefore = parser.read(while: Set(" \t")) } let text = try readUntilDelimiterOrNewline(&parser, state: state) // if we hit a newline add text - if parser.current() == "\n" { - tokens.append(.text(whiteSpaceBefore + text + "\n")) + if parser.current().isNewline { + tokens.append(.text(whiteSpaceBefore + text + String(parser.current()))) state.newLine = true parser.unsafeAdvance() continue @@ -87,7 +96,7 @@ extension HBMustacheTemplate { if isStandalone(&parser, state: state) { setNewLine = true } else if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method)) @@ -100,7 +109,7 @@ extension HBMustacheTemplate { if isStandalone(&parser, state: state) { setNewLine = true } else if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } let sectionTokens = try parse(&parser, state: state.withSectionName(name, method: method)) @@ -109,14 +118,16 @@ extension HBMustacheTemplate { case "/": // end of section parser.unsafeAdvance() + let position = parser.position let (name, method) = try parseName(&parser, state: state) guard name == state.sectionName, method == state.sectionMethod else { + parser.unsafeSetPosition(position) throw Error.sectionCloseNameIncorrect } if isStandalone(&parser, state: state) { setNewLine = true } else if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } return tokens @@ -130,7 +141,7 @@ extension HBMustacheTemplate { case "{": // unescaped variable if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } parser.unsafeAdvance() @@ -141,7 +152,7 @@ extension HBMustacheTemplate { case "&": // unescaped variable if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } parser.unsafeAdvance() @@ -153,11 +164,11 @@ extension HBMustacheTemplate { parser.unsafeAdvance() let (name, _) = try parseName(&parser, state: state) if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) } if isStandalone(&parser, state: state) { setNewLine = true - tokens.append(.partial(name, indentation: whiteSpaceBefore)) + tokens.append(.partial(name, indentation: String(whiteSpaceBefore))) } else { tokens.append(.partial(name, indentation: nil)) } @@ -172,7 +183,7 @@ extension HBMustacheTemplate { default: // variable if whiteSpaceBefore.count > 0 { - tokens.append(.text(whiteSpaceBefore)) + tokens.append(.text(String(whiteSpaceBefore))) whiteSpaceBefore = "" } let (name, method) = try parseName(&parser, state: state) @@ -189,24 +200,23 @@ extension HBMustacheTemplate { /// read until we hit either the start delimiter of a tag or a newline static func readUntilDelimiterOrNewline(_ parser: inout HBParser, state: ParserState) throws -> String { - var untilSet = Set("\n") - guard let delimiterFirstChar = state.startDelimiter.first, - let delimiterFirstScalar = delimiterFirstChar.unicodeScalars.first else { return "" } + var untilSet: Set = ["\n", "\r\n"] + guard let delimiterFirstChar = state.startDelimiter.first else { return "" } var totalText = "" - untilSet.insert(delimiterFirstScalar) + untilSet.insert(delimiterFirstChar) while !parser.reachedEnd() { // read until we hit either a newline or "{" - let text = try parser.read(until: untilSet, throwOnOverflow: false).string + let text = try parser.read(until: untilSet, throwOnOverflow: false) totalText += text // if new line append all text read plus newline - if parser.current() == "\n" { + if parser.current().isNewline { break - } else if parser.current() == delimiterFirstScalar { - if try parser.read(state.startDelimiter) { + } else if parser.current() == delimiterFirstChar { + if try parser.read(string: state.startDelimiter) { break } - totalText += String(delimiterFirstScalar) + totalText += String(delimiterFirstChar) parser.unsafeAdvance() } } @@ -216,59 +226,67 @@ extension HBMustacheTemplate { /// parse variable name static func parseName(_ parser: inout HBParser, state: ParserState) throws -> (String, String?) { parser.read(while: \.isWhitespace) - var text = parser.read(while: sectionNameChars) + let text = String(parser.read(while: sectionNameChars)) parser.read(while: \.isWhitespace) - guard try parser.read(state.endDelimiter) else { throw Error.unfinishedName } + guard try parser.read(string: state.endDelimiter) else { throw Error.unfinishedName } // does the name include brackets. If so this is a method call - let string = text.read(while: sectionNameCharsWithoutBrackets) - if text.reachedEnd() { - return (text.string, nil) + var nameParser = HBParser(String(text)) + let string = nameParser.read(while: sectionNameCharsWithoutBrackets) + if nameParser.reachedEnd() { + return (text, nil) } else { // parse function parameter, as we have just parsed a function name - guard text.current() == "(" else { throw Error.unfinishedName } - text.unsafeAdvance() - let string2 = text.read(while: sectionNameCharsWithoutBrackets) - guard text.current() == ")" else { throw Error.unfinishedName } - text.unsafeAdvance() - guard text.reachedEnd() else { throw Error.unfinishedName } - return (string2.string, string.string) + guard nameParser.current() == "(" else { throw Error.unfinishedName } + nameParser.unsafeAdvance() + let string2 = nameParser.read(while: sectionNameCharsWithoutBrackets) + guard nameParser.current() == ")" else { throw Error.unfinishedName } + nameParser.unsafeAdvance() + guard nameParser.reachedEnd() else { throw Error.unfinishedName } + return (String(string2), String(string)) } } static func parseComment(_ parser: inout HBParser, state: ParserState) throws -> String { let text = try parser.read(untilString: state.endDelimiter, throwOnOverflow: true, skipToEnd: true) - return text.string + return String(text) } static func parserSetDelimiter(_ parser: inout HBParser, state: ParserState) throws -> ParserState { - parser.read(while: \.isWhitespace) - let startDelimiter = try parser.read(until: \.isWhitespace).string - parser.read(while: \.isWhitespace) - let endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace }).string - parser.read(while: \.isWhitespace) + let startDelimiter: Substring + let endDelimiter: Substring + + do { + parser.read(while: \.isWhitespace) + startDelimiter = try parser.read(until: \.isWhitespace) + parser.read(while: \.isWhitespace) + endDelimiter = try parser.read(until: { $0 == "=" || $0.isWhitespace }) + parser.read(while: \.isWhitespace) + } catch { + throw Error.invalidSetDelimiter + } guard try parser.read("=") else { throw Error.invalidSetDelimiter } - guard try parser.read(state.endDelimiter) else { throw Error.invalidSetDelimiter } + guard try parser.read(string: state.endDelimiter) else { throw Error.invalidSetDelimiter } guard startDelimiter.count > 0, endDelimiter.count > 0 else { throw Error.invalidSetDelimiter } - return state.withDelimiters(start: startDelimiter, end: endDelimiter) + return state.withDelimiters(start: String(startDelimiter), end: String(endDelimiter)) } static func hasLineFinished(_ parser: inout HBParser) -> Bool { var parser2 = parser if parser.reachedEnd() { return true } - parser2.read(while: Set(" \t\r")) - if parser2.current() == "\n" { + parser2.read(while: Set(" \t")) + if parser2.current().isNewline { parser2.unsafeAdvance() - try! parser.setPosition(parser2.getPosition()) + try! parser.setPosition(parser2.position) return true } return false } - static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool { + static func isStandalone(_ parser: inout HBParser, state: ParserState) -> Bool { return state.newLine && hasLineFinished(&parser) } - private static let sectionNameCharsWithoutBrackets = Set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?") - private static let sectionNameChars = Set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()") + private static let sectionNameCharsWithoutBrackets = Set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?") + private static let sectionNameChars = Set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._?()") } diff --git a/Tests/HummingbirdMustacheTests/ErrorTests.swift b/Tests/HummingbirdMustacheTests/ErrorTests.swift new file mode 100644 index 0000000..aa7693a --- /dev/null +++ b/Tests/HummingbirdMustacheTests/ErrorTests.swift @@ -0,0 +1,79 @@ +import HummingbirdMustache +import XCTest + +final class ErrorTests: XCTestCase { + func testSectionCloseNameIncorrect() { + XCTAssertThrowsError(try HBMustacheTemplate(string: """ + {{#test}} + {{.}} + {{/test2}} + """)) { error in + switch error { + case let error as HBMustacheTemplate.ParserError: + XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .sectionCloseNameIncorrect) + XCTAssertEqual(error.context.line, "{{/test2}}") + XCTAssertEqual(error.context.lineNumber, 3) + XCTAssertEqual(error.context.columnNumber, 4) + + default: + XCTFail("\(error)") + } + } + } + + func testUnfinishedName() { + XCTAssertThrowsError(try HBMustacheTemplate(string: """ + {{#test}} + {{name} + {{/test2}} + """)) { error in + switch error { + case let error as HBMustacheTemplate.ParserError: + XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .unfinishedName) + XCTAssertEqual(error.context.line, "{{name}") + XCTAssertEqual(error.context.lineNumber, 2) + XCTAssertEqual(error.context.columnNumber, 7) + + default: + XCTFail("\(error)") + } + } + } + + func testExpectedSectionEnd() { + XCTAssertThrowsError(try HBMustacheTemplate(string: """ + {{#test}} + {{.}} + """)) { error in + switch error { + case let error as HBMustacheTemplate.ParserError: + XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .expectedSectionEnd) + XCTAssertEqual(error.context.line, "{{.}}") + XCTAssertEqual(error.context.lineNumber, 2) + XCTAssertEqual(error.context.columnNumber, 6) + + default: + XCTFail("\(error)") + } + } + } + + func testInvalidSetDelimiter() { + XCTAssertThrowsError(try HBMustacheTemplate(string: """ + {{=<% %>=}} + <%.%> + <%={{}}=%> + """)) { error in + switch error { + case let error as HBMustacheTemplate.ParserError: + XCTAssertEqual(error.error as? HBMustacheTemplate.Error, .invalidSetDelimiter) + XCTAssertEqual(error.context.line, "<%={{}}=%>") + XCTAssertEqual(error.context.lineNumber, 3) + XCTAssertEqual(error.context.columnNumber, 4) + + default: + XCTFail("\(error)") + } + } + } +} diff --git a/Tests/HummingbirdMustacheTests/LibraryTests.swift b/Tests/HummingbirdMustacheTests/LibraryTests.swift index 3d591f6..3278ff6 100644 --- a/Tests/HummingbirdMustacheTests/LibraryTests.swift +++ b/Tests/HummingbirdMustacheTests/LibraryTests.swift @@ -5,14 +5,40 @@ final class LibraryTests: XCTestCase { func testDirectoryLoad() throws { let fs = FileManager() try? fs.createDirectory(atPath: "templates", withIntermediateDirectories: false) - let mustache = "{{#value}}{{.}}{{/value}}" - let data = Data(mustache.utf8) defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates")) } - try data.write(to: URL(fileURLWithPath: "templates/test.mustache")) + let mustache = Data("{{#value}}{{.}}{{/value}}".utf8) + try mustache.write(to: URL(fileURLWithPath: "templates/test.mustache")) defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/test.mustache")) } let library = try HBMustacheLibrary(directory: "./templates") let object = ["value": ["value1", "value2"]] XCTAssertEqual(library.render(object, withTemplate: "test"), "value1value2") } + + func testLibraryParserError() throws { + let fs = FileManager() + try? fs.createDirectory(atPath: "templates", withIntermediateDirectories: false) + defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates")) } + let mustache = Data("{{#value}}{{.}}{{/value}}".utf8) + try mustache.write(to: URL(fileURLWithPath: "templates/test.mustache")) + defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/test.mustache")) } + let mustache2 = Data(""" + {{#test}} + {{{name}} + {{/test2}} + """.utf8) + try mustache2.write(to: URL(fileURLWithPath: "templates/error.mustache")) + defer { XCTAssertNoThrow(try fs.removeItem(atPath: "templates/error.mustache")) } + + XCTAssertThrowsError(try HBMustacheLibrary(directory: "./templates")) { error in + guard let parserError = error as? HBMustacheLibrary.ParserError else { + XCTFail("\(error)") + return + } + XCTAssertEqual(parserError.filename, "error.mustache") + XCTAssertEqual(parserError.context.line, "{{{name}}") + XCTAssertEqual(parserError.context.lineNumber, 2) + XCTAssertEqual(parserError.context.columnNumber, 10) + } + } } diff --git a/Tests/HummingbirdMustacheTests/MethodTests.swift b/Tests/HummingbirdMustacheTests/MethodTests.swift index 880c3fb..1957abc 100644 --- a/Tests/HummingbirdMustacheTests/MethodTests.swift +++ b/Tests/HummingbirdMustacheTests/MethodTests.swift @@ -128,6 +128,12 @@ final class MethodTests: XCTestCase { """) } + func testListOutput() throws { + let object = [1, 2, 3, 4] + let template = try HBMustacheTemplate(string: "{{#.}}{{.}}{{^last()}}, {{/last()}}{{/.}}") + XCTAssertEqual(template.render(object), "1, 2, 3, 4") + } + func testDictionaryEnumerated() throws { let template = try HBMustacheTemplate(string: """ {{#enumerated(.)}}{{ key }} = {{ value }}{{/enumerated(.)}} diff --git a/Tests/HummingbirdMustacheTests/TemplateParserTests.swift b/Tests/HummingbirdMustacheTests/TemplateParserTests.swift index e122399..43a3535 100644 --- a/Tests/HummingbirdMustacheTests/TemplateParserTests.swift +++ b/Tests/HummingbirdMustacheTests/TemplateParserTests.swift @@ -31,39 +31,6 @@ final class TemplateParserTests: XCTestCase { let template = try HBMustacheTemplate(string: "{{ section }}") XCTAssertEqual(template.tokens, [.variable(name: "section")]) } - - func testSectionEndError() throws { - XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{#section}}")) { error in - switch error { - case HBMustacheTemplate.Error.expectedSectionEnd: - break - default: - XCTFail("\(error)") - } - } - } - - func testSectionCloseNameIncorrectError() throws { - XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{#section}}{{/error}}")) { error in - switch error { - case HBMustacheTemplate.Error.sectionCloseNameIncorrect: - break - default: - XCTFail("\(error)") - } - } - } - - func testUnmatchedNameError() throws { - XCTAssertThrowsError(_ = try HBMustacheTemplate(string: "test {{section#}}")) { error in - switch error { - case HBMustacheTemplate.Error.unfinishedName: - break - default: - XCTFail("\(error)") - } - } - } } extension HBMustacheTemplate: Equatable {