Merge pull request #226 from Liquidsoul/faster-scanner
Optimise Scanner performance
This commit is contained in:
@@ -40,6 +40,11 @@
|
|||||||
- Update to Spectre 0.9.0.
|
- Update to Spectre 0.9.0.
|
||||||
[Ilya Puchka](https://github.com/ilyapuchka)
|
[Ilya Puchka](https://github.com/ilyapuchka)
|
||||||
[#247](https://github.com/stencilproject/Stencil/pull/247)
|
[#247](https://github.com/stencilproject/Stencil/pull/247)
|
||||||
|
- Optimise Scanner performance.
|
||||||
|
[Eric Thorpe](https://github.com/trametheka)
|
||||||
|
[Sébastien Duperron](https://github.com/Liquidsoul)
|
||||||
|
[David Jennes](https://github.com/djbe)
|
||||||
|
[#226](https://github.com/stencilproject/Stencil/pull/226)
|
||||||
|
|
||||||
|
|
||||||
## 0.12.1
|
## 0.12.1
|
||||||
|
|||||||
@@ -7,6 +7,18 @@ struct Lexer {
|
|||||||
let templateString: String
|
let templateString: String
|
||||||
let lines: [Line]
|
let lines: [Line]
|
||||||
|
|
||||||
|
/// The potential token start characters. In a template these appear after a
|
||||||
|
/// `{` character, for example `{{`, `{%`, `{#`, ...
|
||||||
|
private static let tokenChars: [Unicode.Scalar] = ["{", "%", "#"]
|
||||||
|
|
||||||
|
/// The token end characters, corresponding to their token start characters.
|
||||||
|
/// For example, a variable token starts with `{{` and ends with `}}`
|
||||||
|
private static let tokenCharMap: [Unicode.Scalar: Unicode.Scalar] = [
|
||||||
|
"{": "}",
|
||||||
|
"%": "%",
|
||||||
|
"#": "#"
|
||||||
|
]
|
||||||
|
|
||||||
init(templateName: String? = nil, templateString: String) {
|
init(templateName: String? = nil, templateString: String) {
|
||||||
self.templateName = templateName
|
self.templateName = templateName
|
||||||
self.templateString = templateString
|
self.templateString = templateString
|
||||||
@@ -17,12 +29,19 @@ struct Lexer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a token that will be passed on to the parser, with the given
|
||||||
|
/// content and a range. The content will be tested to see if it's a
|
||||||
|
/// `variable`, a `block` or a `comment`, otherwise it'll default to a simple
|
||||||
|
/// `text` token.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - string: The content string of the token
|
||||||
|
/// - range: The range within the template content, used for smart
|
||||||
|
/// error reporting
|
||||||
func createToken(string: String, at range: Range<String.Index>) -> Token {
|
func createToken(string: String, at range: Range<String.Index>) -> Token {
|
||||||
func strip() -> String {
|
func strip() -> String {
|
||||||
guard string.count > 4 else { return "" }
|
guard string.count > 4 else { return "" }
|
||||||
let start = string.index(string.startIndex, offsetBy: 2)
|
let trimmed = String(string.dropFirst(2).dropLast(2))
|
||||||
let end = string.index(string.endIndex, offsetBy: -2)
|
|
||||||
let trimmed = String(string[start..<end])
|
|
||||||
.components(separatedBy: "\n")
|
.components(separatedBy: "\n")
|
||||||
.filter({ !$0.isEmpty })
|
.filter({ !$0.isEmpty })
|
||||||
.map({ $0.trim(character: " ") })
|
.map({ $0.trim(character: " ") })
|
||||||
@@ -50,26 +69,22 @@ struct Lexer {
|
|||||||
return .text(value: string, at: sourceMap)
|
return .text(value: string, at: sourceMap)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an array of tokens from a given template string.
|
/// Transforms the template into a list of tokens, that will eventually be
|
||||||
|
/// passed on to the parser.
|
||||||
|
///
|
||||||
|
/// - Returns: The list of tokens (see `createToken(string: at:)`).
|
||||||
func tokenize() -> [Token] {
|
func tokenize() -> [Token] {
|
||||||
var tokens: [Token] = []
|
var tokens: [Token] = []
|
||||||
|
|
||||||
let scanner = Scanner(templateString)
|
let scanner = Scanner(templateString)
|
||||||
|
|
||||||
let map = [
|
|
||||||
"{{": "}}",
|
|
||||||
"{%": "%}",
|
|
||||||
"{#": "#}",
|
|
||||||
]
|
|
||||||
|
|
||||||
while !scanner.isEmpty {
|
while !scanner.isEmpty {
|
||||||
if let text = scanner.scan(until: ["{{", "{%", "{#"]) {
|
if let (char, text) = scanner.scanForTokenStart(Lexer.tokenChars) {
|
||||||
if !text.1.isEmpty {
|
if !text.isEmpty {
|
||||||
tokens.append(createToken(string: text.1, at: scanner.range))
|
tokens.append(createToken(string: text, at: scanner.range))
|
||||||
}
|
}
|
||||||
|
|
||||||
let end = map[text.0]!
|
guard let end = Lexer.tokenCharMap[char] else { continue }
|
||||||
let result = scanner.scan(until: end, returnUntil: true)
|
let result = scanner.scanForTokenEnd(end)
|
||||||
tokens.append(createToken(string: result, at: scanner.range))
|
tokens.append(createToken(string: result, at: scanner.range))
|
||||||
} else {
|
} else {
|
||||||
tokens.append(createToken(string: scanner.content, at: scanner.range))
|
tokens.append(createToken(string: scanner.content, at: scanner.range))
|
||||||
@@ -80,6 +95,11 @@ struct Lexer {
|
|||||||
return tokens
|
return tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Finds the line matching the given range (for a token)
|
||||||
|
///
|
||||||
|
/// - Parameter range: The range to search for.
|
||||||
|
/// - Returns: The content for that line, the line number and offset within
|
||||||
|
/// the line.
|
||||||
func rangeLocation(_ range: Range<String.Index>) -> ContentLocation {
|
func rangeLocation(_ range: Range<String.Index>) -> ContentLocation {
|
||||||
guard let line = self.lines.first(where: { $0.range.contains(range.lowerBound) }) else {
|
guard let line = self.lines.first(where: { $0.range.contains(range.lowerBound) }) else {
|
||||||
return ("", 0, 0)
|
return ("", 0, 0)
|
||||||
@@ -95,6 +115,11 @@ class Scanner {
|
|||||||
var content: String
|
var content: String
|
||||||
var range: Range<String.Index>
|
var range: Range<String.Index>
|
||||||
|
|
||||||
|
/// The start delimiter for a token.
|
||||||
|
private static let tokenStartDelimiter: Unicode.Scalar = "{"
|
||||||
|
/// And the corresponding end delimiter for a token.
|
||||||
|
private static let tokenEndDelimiter: Unicode.Scalar = "}"
|
||||||
|
|
||||||
init(_ content: String) {
|
init(_ content: String) {
|
||||||
self.originalContent = content
|
self.originalContent = content
|
||||||
self.content = content
|
self.content = content
|
||||||
@@ -105,64 +130,69 @@ class Scanner {
|
|||||||
return content.isEmpty
|
return content.isEmpty
|
||||||
}
|
}
|
||||||
|
|
||||||
func scan(until: String, returnUntil: Bool = false) -> String {
|
/// Scans for the end of a token, with a specific ending character. If we're
|
||||||
var index = content.startIndex
|
/// searching for the end of a block token `%}`, this method receives a `%`.
|
||||||
|
/// The scanner will search for that `%` followed by a `}`.
|
||||||
|
///
|
||||||
|
/// Note: if the end of a token is found, the `content` and `range`
|
||||||
|
/// properties are updated to reflect this. `content` will be set to what
|
||||||
|
/// remains of the template after the token. `range` will be set to the range
|
||||||
|
/// of the token within the template.
|
||||||
|
///
|
||||||
|
/// - Parameter tokenChar: The token end character to search for.
|
||||||
|
/// - Returns: The content of a token, or "" if no token end was found.
|
||||||
|
func scanForTokenEnd(_ tokenChar: Unicode.Scalar) -> String {
|
||||||
|
var foundChar = false
|
||||||
|
|
||||||
if until.isEmpty {
|
for (index, char) in content.unicodeScalars.enumerated() {
|
||||||
return ""
|
if foundChar && char == Scanner.tokenEndDelimiter {
|
||||||
}
|
let result = String(content.prefix(index))
|
||||||
|
content = String(content.dropFirst(index + 1))
|
||||||
range = range.upperBound..<range.upperBound
|
range = range.upperBound..<originalContent.index(range.upperBound, offsetBy: index + 1)
|
||||||
while index != content.endIndex {
|
|
||||||
let substring = String(content[index...])
|
|
||||||
|
|
||||||
if substring.hasPrefix(until) {
|
|
||||||
let result = String(content[..<index])
|
|
||||||
|
|
||||||
if returnUntil {
|
|
||||||
range = range.lowerBound..<originalContent.index(range.upperBound, offsetBy: until.count)
|
|
||||||
content = String(substring[until.endIndex...])
|
|
||||||
return result + until
|
|
||||||
}
|
|
||||||
|
|
||||||
content = substring
|
|
||||||
return result
|
return result
|
||||||
|
} else {
|
||||||
|
foundChar = (char == tokenChar)
|
||||||
}
|
}
|
||||||
|
|
||||||
index = content.index(after: index)
|
|
||||||
range = range.lowerBound..<originalContent.index(after: range.upperBound)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
content = ""
|
content = ""
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func scan(until: [String]) -> (String, String)? {
|
/// Scans for the start of a token, with a list of potential starting
|
||||||
if until.isEmpty {
|
/// characters. To scan for the start of variables (`{{`), blocks (`{%`) and
|
||||||
return nil
|
/// comments (`{#`), this method receives the characters `{`, `%` and `#`.
|
||||||
}
|
/// The scanner will search for a `{`, followed by one of the search
|
||||||
|
/// characters. It will give the found character, and the content that came
|
||||||
|
/// before the token.
|
||||||
|
///
|
||||||
|
/// Note: if the start of a token is found, the `content` and `range`
|
||||||
|
/// properties are updated to reflect this. `content` will be set to what
|
||||||
|
/// remains of the template starting with the token. `range` will be set to
|
||||||
|
/// the start of the token within the template.
|
||||||
|
///
|
||||||
|
/// - Parameter tokenChars: List of token start characters to search for.
|
||||||
|
/// - Returns: The found token start character, together with the content
|
||||||
|
/// before the token, or nil of no token start was found.
|
||||||
|
func scanForTokenStart(_ tokenChars: [Unicode.Scalar]) -> (Unicode.Scalar, String)? {
|
||||||
|
var foundBrace = false
|
||||||
|
|
||||||
var index = content.startIndex
|
|
||||||
range = range.upperBound..<range.upperBound
|
range = range.upperBound..<range.upperBound
|
||||||
while index != content.endIndex {
|
for (index, char) in content.unicodeScalars.enumerated() {
|
||||||
let substring = String(content[index...])
|
if foundBrace && tokenChars.contains(char) {
|
||||||
for string in until {
|
let result = String(content.prefix(index - 1))
|
||||||
if substring.hasPrefix(string) {
|
content = String(content.dropFirst(index - 1))
|
||||||
let result = String(content[..<index])
|
range = range.upperBound..<originalContent.index(range.upperBound, offsetBy: index - 1)
|
||||||
content = substring
|
return (char, result)
|
||||||
return (string, result)
|
} else {
|
||||||
|
foundBrace = (char == Scanner.tokenStartDelimiter)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
index = content.index(after: index)
|
|
||||||
range = range.lowerBound..<originalContent.index(after: range.upperBound)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extension String {
|
extension String {
|
||||||
func findFirstNot(character: Character) -> String.Index? {
|
func findFirstNot(character: Character) -> String.Index? {
|
||||||
var index = startIndex
|
var index = startIndex
|
||||||
|
|||||||
@@ -1,10 +1,16 @@
|
|||||||
import XCTest
|
import PathKit
|
||||||
import Spectre
|
import Spectre
|
||||||
@testable import Stencil
|
@testable import Stencil
|
||||||
|
import XCTest
|
||||||
|
|
||||||
class LexerTests: XCTestCase {
|
class LexerTests: XCTestCase {
|
||||||
func testLexer() {
|
func testLexer() {
|
||||||
describe("Lexer") {
|
describe("Lexer") {
|
||||||
|
func makeSourceMap(_ token: String, for lexer: Lexer, options: String.CompareOptions = []) -> SourceMap {
|
||||||
|
guard let range = lexer.templateString.range(of: token, options: options) else { fatalError("Token not found") }
|
||||||
|
return SourceMap(location: lexer.rangeLocation(range))
|
||||||
|
}
|
||||||
|
|
||||||
$0.it("can tokenize text") {
|
$0.it("can tokenize text") {
|
||||||
let lexer = Lexer(templateString: "Hello World")
|
let lexer = Lexer(templateString: "Hello World")
|
||||||
let tokens = lexer.tokenize()
|
let tokens = lexer.tokenize()
|
||||||
@@ -44,9 +50,9 @@ class LexerTests: XCTestCase {
|
|||||||
let tokens = lexer.tokenize()
|
let tokens = lexer.tokenize()
|
||||||
|
|
||||||
try expect(tokens.count) == 3
|
try expect(tokens.count) == 3
|
||||||
try expect(tokens[0]) == Token.text(value: "My name is ", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "My name is ")!)))
|
try expect(tokens[0]) == Token.text(value: "My name is ", at: makeSourceMap("My name is ", for: lexer))
|
||||||
try expect(tokens[1]) == Token.variable(value: "myname", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "myname")!)))
|
try expect(tokens[1]) == Token.variable(value: "myname", at: makeSourceMap("myname", for: lexer))
|
||||||
try expect(tokens[2]) == Token.text(value: ".", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: ".")!)))
|
try expect(tokens[2]) == Token.text(value: ".", at: makeSourceMap(".", for: lexer))
|
||||||
}
|
}
|
||||||
|
|
||||||
$0.it("can tokenize two variables without being greedy") {
|
$0.it("can tokenize two variables without being greedy") {
|
||||||
@@ -55,18 +61,23 @@ class LexerTests: XCTestCase {
|
|||||||
let tokens = lexer.tokenize()
|
let tokens = lexer.tokenize()
|
||||||
|
|
||||||
try expect(tokens.count) == 2
|
try expect(tokens.count) == 2
|
||||||
try expect(tokens[0]) == Token.variable(value: "thing", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "thing")!)))
|
try expect(tokens[0]) == Token.variable(value: "thing", at: makeSourceMap("thing", for: lexer))
|
||||||
try expect(tokens[1]) == Token.variable(value: "name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "name")!)))
|
try expect(tokens[1]) == Token.variable(value: "name", at: makeSourceMap("name", for: lexer))
|
||||||
}
|
}
|
||||||
|
|
||||||
$0.it("can tokenize an unclosed block") {
|
$0.it("can tokenize an unclosed block") {
|
||||||
let lexer = Lexer(templateString: "{%}")
|
let lexer = Lexer(templateString: "{%}")
|
||||||
let _ = lexer.tokenize()
|
_ = lexer.tokenize()
|
||||||
|
}
|
||||||
|
|
||||||
|
$0.it("can tokenize incorrect syntax without crashing") {
|
||||||
|
let lexer = Lexer(templateString: "func some() {{% if %}")
|
||||||
|
_ = lexer.tokenize()
|
||||||
}
|
}
|
||||||
|
|
||||||
$0.it("can tokenize an empty variable") {
|
$0.it("can tokenize an empty variable") {
|
||||||
let lexer = Lexer(templateString: "{{}}")
|
let lexer = Lexer(templateString: "{{}}")
|
||||||
let _ = lexer.tokenize()
|
_ = lexer.tokenize()
|
||||||
}
|
}
|
||||||
|
|
||||||
$0.it("can tokenize with new lines") {
|
$0.it("can tokenize with new lines") {
|
||||||
@@ -80,18 +91,39 @@ class LexerTests: XCTestCase {
|
|||||||
}}{%
|
}}{%
|
||||||
endif %}.
|
endif %}.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
let lexer = Lexer(templateString: templateString)
|
let lexer = Lexer(templateString: templateString)
|
||||||
|
|
||||||
let tokens = lexer.tokenize()
|
let tokens = lexer.tokenize()
|
||||||
|
|
||||||
try expect(tokens.count) == 5
|
try expect(tokens.count) == 5
|
||||||
try expect(tokens[0]) == Token.text(value: "My name is ", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "My name is")!)))
|
try expect(tokens[0]) == Token.text(value: "My name is ", at: makeSourceMap("My name is", for: lexer))
|
||||||
try expect(tokens[1]) == Token.block(value: "if name and name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "{%")!)))
|
try expect(tokens[1]) == Token.block(value: "if name and name", at: makeSourceMap("{%", for: lexer))
|
||||||
try expect(tokens[2]) == Token.variable(value: "name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "name", options: [.backwards])!)))
|
try expect(tokens[2]) == Token.variable(value: "name", at: makeSourceMap("name", for: lexer, options: .backwards))
|
||||||
try expect(tokens[3]) == Token.block(value: "endif", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "endif")!)))
|
try expect(tokens[3]) == Token.block(value: "endif", at: makeSourceMap("endif", for: lexer))
|
||||||
try expect(tokens[4]) == Token.text(value: ".", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: ".")!)))
|
try expect(tokens[4]) == Token.text(value: ".", at: makeSourceMap(".", for: lexer))
|
||||||
|
}
|
||||||
|
|
||||||
|
$0.it("can tokenize escape sequences") {
|
||||||
|
let templateString = "class Some {{ '{' }}{% if true %}{{ stuff }}{% endif %}"
|
||||||
|
let lexer = Lexer(templateString: templateString)
|
||||||
|
let tokens = lexer.tokenize()
|
||||||
|
|
||||||
|
try expect(tokens.count) == 5
|
||||||
|
try expect(tokens[0]) == Token.text(value: "class Some ", at: makeSourceMap("class Some ", for: lexer))
|
||||||
|
try expect(tokens[1]) == Token.variable(value: "'{'", at: makeSourceMap("'{'", for: lexer))
|
||||||
|
try expect(tokens[2]) == Token.block(value: "if true", at: makeSourceMap("if true", for: lexer))
|
||||||
|
try expect(tokens[3]) == Token.variable(value: "stuff", at: makeSourceMap("stuff", for: lexer))
|
||||||
|
try expect(tokens[4]) == Token.block(value: "endif", at: makeSourceMap("endif", for: lexer))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func testPerformance() throws {
|
||||||
|
let path = Path(#file) + ".." + "fixtures" + "huge.html"
|
||||||
|
let content: String = try path.read()
|
||||||
|
|
||||||
|
measure {
|
||||||
|
let lexer = Lexer(templateString: content)
|
||||||
|
_ = lexer.tokenize()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,6 +57,7 @@ extension InheritenceTests {
|
|||||||
extension LexerTests {
|
extension LexerTests {
|
||||||
static let __allTests = [
|
static let __allTests = [
|
||||||
("testLexer", testLexer),
|
("testLexer", testLexer),
|
||||||
|
("testPerformance", testPerformance),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
1131
Tests/StencilTests/fixtures/huge.html
Normal file
1131
Tests/StencilTests/fixtures/huge.html
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user