/* ----------------------------------------------------------------------------- Copyright 2023-2024 Kevin P. Barry Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ----------------------------------------------------------------------------- */ // Author: Kevin P. Barry [ta0kira@gmail.com] define ZeoliteWhitespace { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty optional Char current <- input.current() while (`present` current && CharType.whitespace(`require` current)) { current <- input.forward().current() } scoped { String content <- input.take() } in if (content.size() > 0) { token <- ZeoliteParsed.leaf(label: tokenizerName(), content: content) } } } define ZeoliteLineComment { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty \ input.forward().forward() if (input.preview() != "//") { return _ } optional Char current <- input.current() while (`present` current && !CharType.oneOf(`require` current, "\r\n")) { current <- input.forward().current() } token <- ZeoliteParsed.leaf(label: tokenizerName(), content: input.take()) } } define ZeoliteBlockComment { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty \ input.forward().forward() if (input.preview() != "/*") { return _ } scoped { optional Char current <- empty Bool starFound <- false } in while (`present` (current <- input.current())) { if (starFound && `require` current == '/') { \ input.forward() break } else { starFound <- `require` current == '*' } } update { \ input.forward() } token <- ZeoliteParsed.leaf(label: tokenizerName(), content: input.take()) } } define ZeoliteUpperSymbol { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, _) { optional String content <- ZeoliteSymbol.parseUpperSymbol(input)&.take() if (! `present` content) { return empty } else { String type <- ZeoliteSymbol.tryBuiltinCategory(`require` content) <|| "ZeoliteCategoryName" return ZeoliteParsed.leaf(label: type, content: `require` content) } } } define ZeoliteLowerSymbol { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, _) { optional String content <- ZeoliteSymbol.parseLowerSymbol(input)&.preview() if (! `present` content) { return empty } else { String type <- ZeoliteSymbol.tryControlKeyword(`require` content) <|| ZeoliteSymbol.tryTypeKeyword(`require` content) <|| ZeoliteSymbol.tryContainKeyword(`require` content) <|| ZeoliteSymbol.tryStorageKeyword(`require` content) <|| ZeoliteSymbol.tryScopeQualifier(`require` content) <|| ZeoliteSymbol.tryBuiltinCategory(`require` content) <|| ZeoliteSymbol.tryBuiltinFunction(`require` content) <|| ZeoliteSymbol.tryBuiltinConstant(`require` content) <|| tryLabel(input) <|| "ZeoliteFunctionOrVariableName" $Hidden[content]$ return ZeoliteParsed.leaf(label: type, content: input.take()) } } @type tryLabel (TextStream) -> (optional String) tryLabel (input) { if (`present` input.current() && `require` input.current() == ':') { \ input.forward() return "ZeoliteArgLabel" } else { return empty } } } define ZeoliteScopeQualifier { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, _) { optional String content <- ZeoliteSymbol.parseScopeQualifier(input)&.take() if (! `present` content) { return empty } else { String type <- ZeoliteSymbol.tryScopeQualifier(`require` content) <|| "ZeoliteError" return ZeoliteParsed.leaf(label: type, content: `require` content) } } } define ZeoliteOperator { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, _) { optional String content <- ZeoliteSymbol.parseOperatorSymbols(input)&.take() if (! `present` content) { return empty } else { String type <- ZeoliteSymbol.tryAssignment(`require` content) <|| ZeoliteSymbol.tryFunctionCall(`require` content) <|| "ZeoliteOperator" return ZeoliteParsed.leaf(label: type, content: `require` content) } } } define ZeoliteParamName { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) { optional String content <- ZeoliteSymbol.parseParamName(input)&.take() if (! `present` content) { return empty } else { String type <- ZeoliteSymbol.tryBuiltinParam(`require` content) <|| "ZeoliteParamName" return ZeoliteParsed.leaf(label: type, content: `require` content) } } } concrete ZeoliteTestcaseKeyword { defines Default refines Tokenizer } define ZeoliteTestcaseKeyword { default () { return delegate -> #self } tokenize (input, context) (token) { token <- empty scoped { optional String content <- ZeoliteSymbol.parseLowerSymbol(input)&.preview() } in if (! `present` content) { return empty } else { optional String type <- ZeoliteSymbol.tryTestcaseKeyword(`require` content) $Hidden[content]$ if (`present` type) { token <- ZeoliteParsed.leaf(label: `require` type, content: input.take()) } } } } define ZeoliteStringLiteral { $ReadOnlyExcept[]$ refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty if (input.atEnd() || `require` input.current() != '"') { return _ } Vector subsections <- Vector.new() // Setting this early still allows building it incrementally while also // allowing an early return if something is missing. token <- ZeoliteParsed.section(label: tokenizerName(), subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteDoubleQuote", content: input.forward().take())) scoped { optional Char current <- empty } in while (!input.atEnd() && `present` (current <- input.current())) { if (`require` current == '\"') { \ addCurrentChars(input, subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteDoubleQuote", content: input.forward().take())) return _ } elif (`require` current == '\\') { \ addCurrentChars(input, subsections) if (`present` ZeoliteSymbol.parseEscapedChar(input.forward())) { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteEscapedChar", content: input.take())) } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteError", content: input.take())) } } else { \ input.forward() } } \ addCurrentChars(input, subsections) } @type addCurrentChars (TextStream, Append) -> () addCurrentChars (input, output) { if (input.tokenSize() > 0) { \ output.append(ZeoliteParsed.leaf(label: "ZeoliteQuotedChars", content: input.take())) } } } define ZeoliteCharLiteral { $ReadOnlyExcept[]$ refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty if (input.atEnd() || `require` input.current() != '\'') { return _ } Vector subsections <- Vector.new() // Setting this early still allows building it incrementally while also // allowing an early return if something is missing. token <- ZeoliteParsed.section(label: tokenizerName(), subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteSingleQuote", content: input.forward().take())) optional Char current <- input.current() if (! `present` current) { return _ } elif (`require` current == '\\') { if (`present` ZeoliteSymbol.parseEscapedChar(input.forward())) { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteEscapedChar", content: input.take())) } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteError", content: input.take())) } } elif (`require` current == '\'') { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteError", content: input.forward().take())) } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteQuotedChars", content: input.forward().take())) } if (`present` (current <- input.current())) { if (`require` current == '\'') { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteSingleQuote", content: input.forward().take())) } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteError", content: input.forward().take())) } } } } define ZeoliteNumber { $ReadOnlyExcept[]$ refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty optional Char current <- input.current() if (! `present` current) { return _ } elif (CharType.digit(`require` current)) { token <- parseNumber(input, escaped: false, CharType.decChars()) } elif (`require` current `CharType.oneOf` "-+") { \ input.forward() token <- parseNumber(input, escaped: false, CharType.decChars()) } elif (`require` current != '\\') { return _ } else { current <- input.forward().current() if (! `present` current) { return _ } elif (`require` current `CharType.oneOf` "bB") { \ input.forward() token <- parseNumber(input, escaped: true, CharType.binChars()) } elif (`require` current `CharType.oneOf` "oO") { \ input.forward() token <- parseNumber(input, escaped: true, CharType.octChars()) } elif (`require` current `CharType.oneOf` "dD") { \ input.forward() token <- parseNumber(input, escaped: true, CharType.decChars()) } elif (`require` current `CharType.oneOf` "xX") { \ input.forward() token <- parseNumber(input, escaped: true, CharType.hexChars()) } else { return _ } } } @type parseNumber (TextStream, Bool escaped:, SetReader) -> (optional ZeoliteParsed) parseNumber (input, escaped, allowed) { Bool isEmpty <- true optional Char current <- empty while (`present` (current <- input.current()) && `allowed.member` `require` current) { \ input.forward() isEmpty <- false } if (isEmpty) { return empty } elif (`present` (current <- input.current()) && `require` current == '.') { \ input.forward() while (`present` (current <- input.current()) && `allowed.member` `require` current) { \ input.forward() } } if (escaped) { return ZeoliteParsed.leaf(label: "ZeoliteEscapedNumber", content: input.take()) } elif (`present` current && `require` current `CharType.oneOf` "eE") { \ input.forward() if (`present` (current <- input.current()) && `require` current `CharType.oneOf` "-+") { \ input.forward() } while (`present` (current <- input.current()) && `allowed.member` `require` current) { \ input.forward() } } return ZeoliteParsed.leaf(label: "ZeoliteNumber", content: input.take()) } } define ZeoliteBraceSection { tokenizer () { return ZeoliteDelimSection.new(name: tokenizerName(), open: '{', close: '}', empty) } tokenizerName () { return typename<#self>().formatted() } } define ZeoliteParenSection { tokenizer () { return ZeoliteDelimSection.new(name: tokenizerName(), open: '(', close: ')', empty) } tokenizerName () { return typename<#self>().formatted() } } define ZeoliteSquareSection { tokenizer () { return ZeoliteDelimSection.new(name: tokenizerName(), open: '[', close: ']', empty) } tokenizerName () { return typename<#self>().formatted() } } define ZeolitePragma { $ReadOnlyExcept[]$ refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() token <- empty if (input.atEnd() || `require` input.current() != '$') { return _ } Vector subsections <- Vector.new() token <- ZeoliteParsed.section(label: tokenizerName(), subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaDelim", content: input.forward().take())) if (! `present` ZeoliteSymbol.parseUpperSymbol(input)) { return _ } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaName", content: input.take())) } optional Char current <- input.current() if (! `present` current) { return _ } elif (`require` current == '[') { \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaArgOpen", content: input.forward().take())) while (`present` (current <- input.current()) && `require` current != ']') { \ input.forward() } \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaArg", content: input.take())) if (`present` (current <- input.current())) { \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaArgClose", content: input.forward().take())) } else { return _ } } if (`present` (current <- input.current())) { if (`require` current == '$') { \ subsections.append(ZeoliteParsed.leaf(label: "ZeolitePragmaDelim", content: input.forward().take())) } else { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteError", content: input.forward().take())) } } } } define ZeoliteExtras { refines Tokenizer tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { \ input.reset() optional Char current <- input.current() if (! `present` current) { return empty } elif (`require` current == '_') { \ input.forward() return ZeoliteParsed.leaf(label: "ZeoliteIgnore", content: input.take()) } elif (`require` current == '\\') { \ input.forward() return ZeoliteParsed.leaf(label: "ZeoliteDiscard", content: input.take()) } elif (`require` current == '`') { \ input.forward() return ZeoliteParsed.leaf(label: "ZeoliteTick", content: input.take()) } elif (`require` current == ',') { \ input.forward() return ZeoliteParsed.leaf(label: "ZeoliteComma", content: input.take()) } elif (`require` current `CharType.oneOf` ";#@") { \ input.forward() return ZeoliteParsed.leaf(label: "ZeoliteError", content: input.take()) } else { return empty } } } define ZeoliteTestcase { $ReadOnlyExcept[]$ refines Tokenizer @category Tokenizer testcaseSpecs <- TokenAlternatives.new() .append(ZeoliteOptionalSeparator.default()) .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) .append(ZeoliteTestcaseKeyword.default()) .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) tokenizer () { return delegate -> #self } tokenizerName () { return typename<#self>().formatted() } tokenize (input, context) (token) { scoped { optional String content <- ZeoliteSymbol.parseLowerSymbol(input)&.preview() } in if (! `present` content || require(content) != "testcase") { return empty } Vector subsections <- Vector.new() // Setting this early still allows building it incrementally while also // allowing an early return if something is missing. token <- ZeoliteParsed.section(label: tokenizerName(), subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteContainKeyword", content: input.take())) \ ZeoliteOptionalSeparator.parseAny(input, context, subsections) // Testcase description string. scoped { optional ZeoliteParsed name <- delegate -> `ZeoliteStringLiteral.tokenizer().tokenize` } in if (`present` name) { \ subsections.append(`require` name) } else { return _ } \ ZeoliteOptionalSeparator.parseAny(input, context, subsections) // Testcase specs. \ StreamTokenizer:new(context: context, tokenizer: ZeoliteDelimSection.new(name: "ZeoliteTestcaseSection", open: '{', close: '}', testcaseSpecs)) .tokenizeAll(input, subsections) } } concrete ZeoliteOptionalSeparator { defines Default refines Tokenizer visibility Tokenizer @type parseAny (TextStream, ZeoliteParseContext, Append) -> () } define ZeoliteOptionalSeparator { $ReadOnlyExcept[]$ @category Tokenizer tokenizer <- TokenAlternatives.new() .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) .append(UseNamedTokenizer.new()) parseAny (input, context, output) { \ StreamTokenizer:new(context: context, tokenizer: default()).tokenizeAll(input, output) } default () { return delegate -> #self } tokenize (input, output) { return delegate -> `tokenizer.tokenize` } } concrete ZeoliteDelimSection { @type new (String name:, Char open:, Char close:, optional Tokenizer) -> (Tokenizer) } define ZeoliteDelimSection { $ReadOnlyExcept[]$ refines Tokenizer @value String name @value Char open @value Char close @value optional Tokenizer tokenizer new (name, open, close, tokenizer) { return delegate -> #self } tokenize (input, context) (token) { \ input.reset() token <- empty if (input.atEnd() || `require` input.current() != open) { return _ } Vector subsections <- Vector.new() token <- ZeoliteParsed.section(label: name, subsections) \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteOpenDelim", content: input.forward().take())) \ StreamTokenizer:new(context: context, tokenizer: tokenizer <|| context.defaultTokenizer()).tokenizeAll(input, subsections) \ input.reset() if (!input.atEnd() && `require` input.current() == close) { \ subsections.append(ZeoliteParsed.leaf(label: "ZeoliteCloseDelim", content: input.forward().take())) } } }