swiftpm-stencil/Sources/Stencil/Lexer.swift

import Foundation

// swiftlint:disable large_tuple
typealias Line = (content: String, number: UInt, range: Range<String.Index>)
/// Location in some content (text)
public typealias ContentLocation = (content: String, lineNumber: UInt, lineOffset: Int)
// swiftlint:enable large_tuple

struct Lexer {
	let templateName: String?
	let templateString: String
	let lines: [Line]

	/// The potential token start characters. In a template these appear after a
	/// `{` character, for example `{{`, `{%`, `{#`, ...
	private static let tokenChars: [Unicode.Scalar] = ["{", "%", "#"]

	/// The minimum length of a tag
	private static let tagLength = 2

	/// The token end characters, corresponding to their token start characters.
	/// For example, a variable token starts with `{{` and ends with `}}`
	private static let tokenCharMap: [Unicode.Scalar: Unicode.Scalar] = [
		"{": "}",
		"%": "%",
		"#": "#"
	]

	/// Characters controlling whitespace trimming behaviour
	private static let behaviourMap: [Character: WhitespaceBehaviour.Behaviour] = [
		"+": .keep,
		"-": .trim
	]

	init(templateName: String? = nil, templateString: String) {
		self.templateName = templateName
		self.templateString = templateString

		self.lines = zip(1..., templateString.components(separatedBy: .newlines)).compactMap { index, line in
			guard !line.isEmpty,
				let range = templateString.range(of: line) else { return nil }
			return (content: line, number: UInt(index), range)
		}
	}

	private func behaviour(string: String, tagLength: Int) -> WhitespaceBehaviour {
		let leftIndex = string.index(string.startIndex, offsetBy: tagLength, limitedBy: string.endIndex)
		let rightIndex = string.index(string.endIndex, offsetBy: -(tagLength + 1), limitedBy: string.startIndex)

		return WhitespaceBehaviour(
			leading: Self.behaviourMap[leftIndex.map { string[$0] } ?? " "] ?? .unspecified,
			trailing: Self.behaviourMap[rightIndex.map { string[$0] } ?? " "] ?? .unspecified
		)
	}

	/// Create a token that will be passed on to the parser, with the given
	/// content and a range. The content will be tested to see if it's a
	/// `variable`, a `block` or a `comment`, otherwise it'll default to a simple
	/// `text` token.
	///
	/// - Parameters:
	///		- string: The content string of the token
	///		- range: The range within the template content, used for smart
	///						 error reporting
	func createToken(string: String, at range: Range<String.Index>) -> Token {
		func strip(length: (Int, Int) = (Self.tagLength, Self.tagLength)) -> String {
			guard string.count > (length.0 + length.1) else { return "" }
			let trimmed = String(string.dropFirst(length.0).dropLast(length.1))
				.components(separatedBy: "\n")
				.filter { !$0.isEmpty }
				.map { $0.trim(character: " ") }
				.joined(separator: " ")
			return trimmed
		}

		if string.hasPrefix("{{") || string.hasPrefix("{%") || string.hasPrefix("{#") {
			let behaviour = string.hasPrefix("{%") ? behaviour(string: string, tagLength: Self.tagLength) : .unspecified
			let stripLengths = (
				Self.tagLength + (behaviour.leading != .unspecified ? 1 : 0),
				Self.tagLength + (behaviour.trailing != .unspecified ? 1 : 0)
			)

			let value = strip(length: stripLengths)
			let range = templateString.range(of: value, range: range) ?? range
			let location = rangeLocation(range)
			let sourceMap = SourceMap(filename: templateName, location: location)

			if string.hasPrefix("{{") {
				return .variable(value: value, at: sourceMap)
			} else if string.hasPrefix("{%") {
				return .block(value: strip(length: stripLengths), at: sourceMap, whitespace: behaviour)
			} else if string.hasPrefix("{#") {
				return .comment(value: value, at: sourceMap)
			}
		}

		let location = rangeLocation(range)
		let sourceMap = SourceMap(filename: templateName, location: location)
		return .text(value: string, at: sourceMap)
	}

	/// Transforms the template into a list of tokens, that will eventually be
	/// passed on to the parser.
	///
	/// - Returns: The list of tokens (see `createToken(string: at:)`).
	func tokenize() -> [Token] {
		var tokens: [Token] = []

		let scanner = Scanner(templateString)
		while !scanner.isEmpty {
			if let (char, text) = scanner.scanForTokenStart(Self.tokenChars) {
				if !text.isEmpty {
					tokens.append(createToken(string: text, at: scanner.range))
				}

				guard let end = Self.tokenCharMap[char] else { continue }
				let result = scanner.scanForTokenEnd(end)
				tokens.append(createToken(string: result, at: scanner.range))
			} else {
				tokens.append(createToken(string: scanner.content, at: scanner.range))
				scanner.content = ""
			}
		}

		return tokens
	}

	/// Finds the line matching the given range (for a token)
	///
	/// - Parameter range: The range to search for.
	/// - Returns: The content for that line, the line number and offset within
	///						 the line.
	func rangeLocation(_ range: Range<String.Index>) -> ContentLocation {
		guard let line = self.lines.first(where: { $0.range.contains(range.lowerBound) }) else {
			return ("", 0, 0)
		}
		let offset = templateString.distance(from: line.range.lowerBound, to: range.lowerBound)
		return (line.content, line.number, offset)
	}
}

class Scanner {
	let originalContent: String
	var content: String
	var range: Range<String.UnicodeScalarView.Index>

	/// The start delimiter for a token.
	private static let tokenStartDelimiter: Unicode.Scalar = "{"
	/// And the corresponding end delimiter for a token.
	private static let tokenEndDelimiter: Unicode.Scalar = "}"

	init(_ content: String) {
		self.originalContent = content
		self.content = content
		range = content.unicodeScalars.startIndex..<content.unicodeScalars.startIndex
	}

	var isEmpty: Bool {
		content.isEmpty
	}

	/// Scans for the end of a token, with a specific ending character. If we're
	/// searching for the end of a block token `%}`, this method receives a `%`.
	/// The scanner will search for that `%` followed by a `}`.
	///
	/// Note: if the end of a token is found, the `content` and `range`
	/// properties are updated to reflect this. `content` will be set to what
	/// remains of the template after the token. `range` will be set to the range
	/// of the token within the template.
	///
	/// - Parameter tokenChar: The token end character to search for.
	/// - Returns: The content of a token, or "" if no token end was found.
	func scanForTokenEnd(_ tokenChar: Unicode.Scalar) -> String {
		var foundChar = false

		for (index, char) in zip(0..., content.unicodeScalars) {
			if foundChar && char == Self.tokenEndDelimiter {
				let result = String(content.unicodeScalars.prefix(index + 1))
				content = String(content.unicodeScalars.dropFirst(index + 1))
				range = range.upperBound..<originalContent.unicodeScalars.index(range.upperBound, offsetBy: index + 1)
				return result
			} else {
				foundChar = (char == tokenChar)
			}
		}

		content = ""
		return ""
	}

	/// Scans for the start of a token, with a list of potential starting
	/// characters. To scan for the start of variables (`{{`), blocks (`{%`) and
	/// comments (`{#`), this method receives the characters `{`, `%` and `#`.
	/// The scanner will search for a `{`, followed by one of the search
	/// characters. It will give the found character, and the content that came
	/// before the token.
	///
	/// Note: if the start of a token is found, the `content` and `range`
	/// properties are updated to reflect this. `content` will be set to what
	/// remains of the template starting with the token. `range` will be set to
	/// the start of the token within the template.
	///
	/// - Parameter tokenChars: List of token start characters to search for.
	/// - Returns: The found token start character, together with the content
	///						 before the token, or nil of no token start was found.
	func scanForTokenStart(_ tokenChars: [Unicode.Scalar]) -> (Unicode.Scalar, String)? {
		var foundBrace = false

		range = range.upperBound..<range.upperBound
		for (index, char) in zip(0..., content.unicodeScalars) {
			if foundBrace && tokenChars.contains(char) {
				let result = String(content.unicodeScalars.prefix(index - 1))
				content = String(content.unicodeScalars.dropFirst(index - 1))
				range = range.upperBound..<originalContent.unicodeScalars.index(range.upperBound, offsetBy: index - 1)
				return (char, result)
			} else {
				foundBrace = (char == Self.tokenStartDelimiter)
			}
		}

		return nil
	}
}

extension String {
	func findFirstNot(character: Character) -> String.Index? {
		var index = startIndex

		while index != endIndex {
			if character != self[index] {
				return index
			}
			index = self.index(after: index)
		}

		return nil
	}

	func findLastNot(character: Character) -> String.Index? {
		var index = self.index(before: endIndex)

		while index != startIndex {
			if character != self[index] {
				return self.index(after: index)
			}
			index = self.index(before: index)
		}

		return nil
	}

	func trim(character: Character) -> String {
		let first = findFirstNot(character: character) ?? startIndex
		let last = findLastNot(character: character) ?? endIndex
		return String(self[first..<last])
	}
}