diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce06c8..9645027 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,11 @@ - Update to Spectre 0.9.0. [Ilya Puchka](https://github.com/ilyapuchka) [#247](https://github.com/stencilproject/Stencil/pull/247) +- Optimise Scanner performance. + [Eric Thorpe](https://github.com/trametheka) + [Sébastien Duperron](https://github.com/Liquidsoul) + [David Jennes](https://github.com/djbe) + [#226](https://github.com/stencilproject/Stencil/pull/226) ## 0.12.1 diff --git a/Sources/Lexer.swift b/Sources/Lexer.swift index 26d6a1a..015b7d7 100644 --- a/Sources/Lexer.swift +++ b/Sources/Lexer.swift @@ -7,6 +7,18 @@ struct Lexer { let templateString: String let lines: [Line] + /// The potential token start characters. In a template these appear after a + /// `{` character, for example `{{`, `{%`, `{#`, ... + private static let tokenChars: [Unicode.Scalar] = ["{", "%", "#"] + + /// The token end characters, corresponding to their token start characters. + /// For example, a variable token starts with `{{` and ends with `}}` + private static let tokenCharMap: [Unicode.Scalar: Unicode.Scalar] = [ + "{": "}", + "%": "%", + "#": "#" + ] + init(templateName: String? = nil, templateString: String) { self.templateName = templateName self.templateString = templateString @@ -17,12 +29,19 @@ struct Lexer { } } + /// Create a token that will be passed on to the parser, with the given + /// content and a range. The content will be tested to see if it's a + /// `variable`, a `block` or a `comment`, otherwise it'll default to a simple + /// `text` token. + /// + /// - Parameters: + /// - string: The content string of the token + /// - range: The range within the template content, used for smart + /// error reporting func createToken(string: String, at range: Range) -> Token { func strip() -> String { guard string.count > 4 else { return "" } - let start = string.index(string.startIndex, offsetBy: 2) - let end = string.index(string.endIndex, offsetBy: -2) - let trimmed = String(string[start.. [Token] { var tokens: [Token] = [] let scanner = Scanner(templateString) - - let map = [ - "{{": "}}", - "{%": "%}", - "{#": "#}", - ] - while !scanner.isEmpty { - if let text = scanner.scan(until: ["{{", "{%", "{#"]) { - if !text.1.isEmpty { - tokens.append(createToken(string: text.1, at: scanner.range)) + if let (char, text) = scanner.scanForTokenStart(Lexer.tokenChars) { + if !text.isEmpty { + tokens.append(createToken(string: text, at: scanner.range)) } - let end = map[text.0]! - let result = scanner.scan(until: end, returnUntil: true) + guard let end = Lexer.tokenCharMap[char] else { continue } + let result = scanner.scanForTokenEnd(end) tokens.append(createToken(string: result, at: scanner.range)) } else { tokens.append(createToken(string: scanner.content, at: scanner.range)) @@ -80,6 +95,11 @@ struct Lexer { return tokens } + /// Finds the line matching the given range (for a token) + /// + /// - Parameter range: The range to search for. + /// - Returns: The content for that line, the line number and offset within + /// the line. func rangeLocation(_ range: Range) -> ContentLocation { guard let line = self.lines.first(where: { $0.range.contains(range.lowerBound) }) else { return ("", 0, 0) @@ -95,6 +115,11 @@ class Scanner { var content: String var range: Range + /// The start delimiter for a token. + private static let tokenStartDelimiter: Unicode.Scalar = "{" + /// And the corresponding end delimiter for a token. + private static let tokenEndDelimiter: Unicode.Scalar = "}" + init(_ content: String) { self.originalContent = content self.content = content @@ -105,64 +130,69 @@ class Scanner { return content.isEmpty } - func scan(until: String, returnUntil: Bool = false) -> String { - var index = content.startIndex + /// Scans for the end of a token, with a specific ending character. If we're + /// searching for the end of a block token `%}`, this method receives a `%`. + /// The scanner will search for that `%` followed by a `}`. + /// + /// Note: if the end of a token is found, the `content` and `range` + /// properties are updated to reflect this. `content` will be set to what + /// remains of the template after the token. `range` will be set to the range + /// of the token within the template. + /// + /// - Parameter tokenChar: The token end character to search for. + /// - Returns: The content of a token, or "" if no token end was found. + func scanForTokenEnd(_ tokenChar: Unicode.Scalar) -> String { + var foundChar = false - if until.isEmpty { - return "" - } - - range = range.upperBound.. (String, String)? { - if until.isEmpty { - return nil - } + /// Scans for the start of a token, with a list of potential starting + /// characters. To scan for the start of variables (`{{`), blocks (`{%`) and + /// comments (`{#`), this method receives the characters `{`, `%` and `#`. + /// The scanner will search for a `{`, followed by one of the search + /// characters. It will give the found character, and the content that came + /// before the token. + /// + /// Note: if the start of a token is found, the `content` and `range` + /// properties are updated to reflect this. `content` will be set to what + /// remains of the template starting with the token. `range` will be set to + /// the start of the token within the template. + /// + /// - Parameter tokenChars: List of token start characters to search for. + /// - Returns: The found token start character, together with the content + /// before the token, or nil of no token start was found. + func scanForTokenStart(_ tokenChars: [Unicode.Scalar]) -> (Unicode.Scalar, String)? { + var foundBrace = false - var index = content.startIndex range = range.upperBound.. String.Index? { var index = startIndex diff --git a/Tests/StencilTests/LexerSpec.swift b/Tests/StencilTests/LexerSpec.swift index 6f49a4c..ebc114e 100644 --- a/Tests/StencilTests/LexerSpec.swift +++ b/Tests/StencilTests/LexerSpec.swift @@ -1,10 +1,16 @@ -import XCTest +import PathKit import Spectre @testable import Stencil +import XCTest class LexerTests: XCTestCase { func testLexer() { describe("Lexer") { + func makeSourceMap(_ token: String, for lexer: Lexer, options: String.CompareOptions = []) -> SourceMap { + guard let range = lexer.templateString.range(of: token, options: options) else { fatalError("Token not found") } + return SourceMap(location: lexer.rangeLocation(range)) + } + $0.it("can tokenize text") { let lexer = Lexer(templateString: "Hello World") let tokens = lexer.tokenize() @@ -44,9 +50,9 @@ class LexerTests: XCTestCase { let tokens = lexer.tokenize() try expect(tokens.count) == 3 - try expect(tokens[0]) == Token.text(value: "My name is ", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "My name is ")!))) - try expect(tokens[1]) == Token.variable(value: "myname", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "myname")!))) - try expect(tokens[2]) == Token.text(value: ".", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: ".")!))) + try expect(tokens[0]) == Token.text(value: "My name is ", at: makeSourceMap("My name is ", for: lexer)) + try expect(tokens[1]) == Token.variable(value: "myname", at: makeSourceMap("myname", for: lexer)) + try expect(tokens[2]) == Token.text(value: ".", at: makeSourceMap(".", for: lexer)) } $0.it("can tokenize two variables without being greedy") { @@ -55,43 +61,69 @@ class LexerTests: XCTestCase { let tokens = lexer.tokenize() try expect(tokens.count) == 2 - try expect(tokens[0]) == Token.variable(value: "thing", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "thing")!))) - try expect(tokens[1]) == Token.variable(value: "name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "name")!))) + try expect(tokens[0]) == Token.variable(value: "thing", at: makeSourceMap("thing", for: lexer)) + try expect(tokens[1]) == Token.variable(value: "name", at: makeSourceMap("name", for: lexer)) } $0.it("can tokenize an unclosed block") { let lexer = Lexer(templateString: "{%}") - let _ = lexer.tokenize() + _ = lexer.tokenize() + } + + $0.it("can tokenize incorrect syntax without crashing") { + let lexer = Lexer(templateString: "func some() {{% if %}") + _ = lexer.tokenize() } $0.it("can tokenize an empty variable") { let lexer = Lexer(templateString: "{{}}") - let _ = lexer.tokenize() + _ = lexer.tokenize() } $0.it("can tokenize with new lines") { let templateString = """ - My name is {% - if name - and - name - %}{{ - name - }}{% - endif %}. - """ - + My name is {% + if name + and + name + %}{{ + name + }}{% + endif %}. + """ let lexer = Lexer(templateString: templateString) - let tokens = lexer.tokenize() try expect(tokens.count) == 5 - try expect(tokens[0]) == Token.text(value: "My name is ", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "My name is")!))) - try expect(tokens[1]) == Token.block(value: "if name and name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "{%")!))) - try expect(tokens[2]) == Token.variable(value: "name", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "name", options: [.backwards])!))) - try expect(tokens[3]) == Token.block(value: "endif", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: "endif")!))) - try expect(tokens[4]) == Token.text(value: ".", at: SourceMap(location: lexer.rangeLocation(templateString.range(of: ".")!))) + try expect(tokens[0]) == Token.text(value: "My name is ", at: makeSourceMap("My name is", for: lexer)) + try expect(tokens[1]) == Token.block(value: "if name and name", at: makeSourceMap("{%", for: lexer)) + try expect(tokens[2]) == Token.variable(value: "name", at: makeSourceMap("name", for: lexer, options: .backwards)) + try expect(tokens[3]) == Token.block(value: "endif", at: makeSourceMap("endif", for: lexer)) + try expect(tokens[4]) == Token.text(value: ".", at: makeSourceMap(".", for: lexer)) + } + + $0.it("can tokenize escape sequences") { + let templateString = "class Some {{ '{' }}{% if true %}{{ stuff }}{% endif %}" + let lexer = Lexer(templateString: templateString) + let tokens = lexer.tokenize() + + try expect(tokens.count) == 5 + try expect(tokens[0]) == Token.text(value: "class Some ", at: makeSourceMap("class Some ", for: lexer)) + try expect(tokens[1]) == Token.variable(value: "'{'", at: makeSourceMap("'{'", for: lexer)) + try expect(tokens[2]) == Token.block(value: "if true", at: makeSourceMap("if true", for: lexer)) + try expect(tokens[3]) == Token.variable(value: "stuff", at: makeSourceMap("stuff", for: lexer)) + try expect(tokens[4]) == Token.block(value: "endif", at: makeSourceMap("endif", for: lexer)) } } } + + func testPerformance() throws { + let path = Path(#file) + ".." + "fixtures" + "huge.html" + let content: String = try path.read() + + measure { + let lexer = Lexer(templateString: content) + _ = lexer.tokenize() + } + } } diff --git a/Tests/StencilTests/XCTestManifests.swift b/Tests/StencilTests/XCTestManifests.swift index 84f6cce..73cf026 100644 --- a/Tests/StencilTests/XCTestManifests.swift +++ b/Tests/StencilTests/XCTestManifests.swift @@ -57,6 +57,7 @@ extension InheritenceTests { extension LexerTests { static let __allTests = [ ("testLexer", testLexer), + ("testPerformance", testPerformance), ] } diff --git a/Tests/StencilTests/fixtures/huge.html b/Tests/StencilTests/fixtures/huge.html new file mode 100644 index 0000000..c338ed6 --- /dev/null +++ b/Tests/StencilTests/fixtures/huge.html @@ -0,0 +1,1131 @@ + + + + {% block title %}Rond De Tafel + {% if sort == "new" %} + {{ block.super }} - Nieuwste spellen + {% elif sort == "upcoming" %} + {{ block.super }} - Binnenkort op de agenda + {% elif sort == "near-me" %} + {{ block.super }} - In mijn buurt + {% endif %} + {% endblock %} + + + + + + + + + + {% block opengraph %} + + + {% endblock %} + + + + + + + + {% block additional-head %}{% endblock %} + + + + + + + +
+
+ +
    + {% if sort == "new" %} +
  • + + Nieuw + +
  • + {% else %} +
  • + + Nieuw + +
  • + {% endif %} + {% if sort == "upcoming" %} +
  • + + Binnenkort + +
  • + {% else %} +
  • + + Binnenkort + +
  • + {% endif %} + {% if sort == "near-me" %} +
  • + + Dichtbij + +
  • + {% else %} +
  • + + Dichtbij + +
  • + {% endif %} +
+ + +
+ +{% if sort == "new" %} +

Nieuwste spellen

+{% elif sort == "upcoming" %} +

Binnenkort op de agenda

+{% elif sort == "near-me" %} +

In mijn buurt

+{% endif %} + +{% if base.user %} +
+ Spellen die je zelf organiseert worden niet getoond op deze pagina. + Deze spellen zijn te vinden in je persoonlijk menu, onder Mijn spellen. +
+{% endif %} + +{% if sort == "near-me" and not base.user.location %} +
+ {% if base.user %} + Om deze functie te activeren moet je een adres ingeven bij Instellingen. + {% else %} + Om deze functie te activeren moet je eerst aanmelden. + Daarna kan je een adres ingeven bij Instellingen. + {% endif %} +
+ +{% elif activities %} + {% for activity in activities %} + + + + + +
+
{{ activity.name }}
+

+ + + {{ activity.shortDate }} + + {{ activity.longDate }} + + om {{ activity.time }} +
+ + + {{ activity.time }}
+
+ {{ activity.host.name }}
+ {{ activity.location.city }} + {% if base.user.location %} + ({{ activity.distance }}km) + {% endif %} +

+
+
+ {% endfor %} + +{% else %} +

Geen spellen gepland.

+{% endif %} +
+ +
+ © 2018 - Rond De Tafel
+ Like ons op Facebook
+ Broncode beschikbaar op GitHub +
+ + + + + {% block additional-body %}{% endblock %} + + + + + + {% block title %}Rond De Tafel + {% if sort == "new" %} + {{ block.super }} - Nieuwste spellen + {% elif sort == "upcoming" %} + {{ block.super }} - Binnenkort op de agenda + {% elif sort == "near-me" %} + {{ block.super }} - In mijn buurt + {% endif %} + {% endblock %} + + + + + + + + + + {% block opengraph %} + + + {% endblock %} + + + + + + + + {% block additional-head %}{% endblock %} + + + + + + + +
+
+ +
    + {% if sort == "new" %} +
  • + + Nieuw + +
  • + {% else %} +
  • + + Nieuw + +
  • + {% endif %} + {% if sort == "upcoming" %} +
  • + + Binnenkort + +
  • + {% else %} +
  • + + Binnenkort + +
  • + {% endif %} + {% if sort == "near-me" %} +
  • + + Dichtbij + +
  • + {% else %} +
  • + + Dichtbij + +
  • + {% endif %} +
+ + +
+ +{% if sort == "new" %} +

Nieuwste spellen

+{% elif sort == "upcoming" %} +

Binnenkort op de agenda

+{% elif sort == "near-me" %} +

In mijn buurt

+{% endif %} + +{% if base.user %} +
+ Spellen die je zelf organiseert worden niet getoond op deze pagina. + Deze spellen zijn te vinden in je persoonlijk menu, onder Mijn spellen. +
+{% endif %} + +{% if sort == "near-me" and not base.user.location %} +
+ {% if base.user %} + Om deze functie te activeren moet je een adres ingeven bij Instellingen. + {% else %} + Om deze functie te activeren moet je eerst aanmelden. + Daarna kan je een adres ingeven bij Instellingen. + {% endif %} +
+ +{% elif activities %} + {% for activity in activities %} + + + + + +
+
{{ activity.name }}
+

+ + + {{ activity.shortDate }} + + {{ activity.longDate }} + + om {{ activity.time }} +
+ + + {{ activity.time }}
+
+ {{ activity.host.name }}
+ {{ activity.location.city }} + {% if base.user.location %} + ({{ activity.distance }}km) + {% endif %} +

+
+
+ {% endfor %} + +{% else %} +

Geen spellen gepland.

+{% endif %} +
+ +
+ © 2018 - Rond De Tafel
+ Like ons op Facebook
+ Broncode beschikbaar op GitHub +
+ + + + + {% block additional-body %}{% endblock %} + + + + + + {% block title %}Rond De Tafel + {% if sort == "new" %} + {{ block.super }} - Nieuwste spellen + {% elif sort == "upcoming" %} + {{ block.super }} - Binnenkort op de agenda + {% elif sort == "near-me" %} + {{ block.super }} - In mijn buurt + {% endif %} + {% endblock %} + + + + + + + + + + {% block opengraph %} + + + {% endblock %} + + + + + + + + {% block additional-head %}{% endblock %} + + + + + + + +
+
+ +
    + {% if sort == "new" %} +
  • + + Nieuw + +
  • + {% else %} +
  • + + Nieuw + +
  • + {% endif %} + {% if sort == "upcoming" %} +
  • + + Binnenkort + +
  • + {% else %} +
  • + + Binnenkort + +
  • + {% endif %} + {% if sort == "near-me" %} +
  • + + Dichtbij + +
  • + {% else %} +
  • + + Dichtbij + +
  • + {% endif %} +
+ + +
+ +{% if sort == "new" %} +

Nieuwste spellen

+{% elif sort == "upcoming" %} +

Binnenkort op de agenda

+{% elif sort == "near-me" %} +

In mijn buurt

+{% endif %} + +{% if base.user %} +
+ Spellen die je zelf organiseert worden niet getoond op deze pagina. + Deze spellen zijn te vinden in je persoonlijk menu, onder Mijn spellen. +
+{% endif %} + +{% if sort == "near-me" and not base.user.location %} +
+ {% if base.user %} + Om deze functie te activeren moet je een adres ingeven bij Instellingen. + {% else %} + Om deze functie te activeren moet je eerst aanmelden. + Daarna kan je een adres ingeven bij Instellingen. + {% endif %} +
+ +{% elif activities %} + {% for activity in activities %} + + + + + +
+
{{ activity.name }}
+

+ + + {{ activity.shortDate }} + + {{ activity.longDate }} + + om {{ activity.time }} +
+ + + {{ activity.time }}
+
+ {{ activity.host.name }}
+ {{ activity.location.city }} + {% if base.user.location %} + ({{ activity.distance }}km) + {% endif %} +

+
+
+ {% endfor %} + +{% else %} +

Geen spellen gepland.

+{% endif %} +
+ +
+ © 2018 - Rond De Tafel
+ Like ons op Facebook
+ Broncode beschikbaar op GitHub +
+ + + + + {% block additional-body %}{% endblock %} + +