From 4cb8ae3f80d94a892ee079c0d98e02138fe77496 Mon Sep 17 00:00:00 2001 From: Daniel Bulant Date: Sat, 17 Feb 2024 20:45:03 +0100 Subject: [PATCH] some progress, broken --- package.json | 13 ++- src/surrealql.grammar | 246 ++++++++++++++++++++++++--------------- src/surrealql.ts | 9 +- src/tokens.ts | 262 ++++++++++++++++++++++++++++++++++++++++++ test.js | 4 +- 5 files changed, 435 insertions(+), 99 deletions(-) create mode 100644 src/tokens.ts diff --git a/package.json b/package.json index 5a1e91b..8fb6295 100644 --- a/package.json +++ b/package.json @@ -4,19 +4,30 @@ "description": "SurrealQL language support for CodeMirror 6", "main": "dist/index.cjs", "type": "module", + "author": { + "name": "Daniel Bulant", + "email": "/@danbulant.eu", + "url": "https://danbulant.eu" + }, + "repository": { + "type": "git", + "url": "https://github.com/danbulant/lang-surrealql.git" + }, + "homepage": "https://github.com/danbulant/lang-surrealql", "exports": { "import": "./dist/index.js", "require": "./dist/index.cjs" }, "scripts": { "test": "cm-runtests", - "prepare": "cm-buildhelper src/surrealql.ts" + "prepare": "lezer-generator src/surrealql.grammar -o src/surrealql.grammar && cm-buildhelper src/surrealql.ts" }, "types": "dist/index.d.ts", "module": "dist/index.js", "sideEffects": false, "license": "MIT", "devDependencies": { + "@lezer/generator": "^1.0.0", "@codemirror/buildhelper": "^1.0.0" }, "dependencies": { diff --git a/src/surrealql.grammar b/src/surrealql.grammar index 990f825..93f2a89 100644 --- a/src/surrealql.grammar +++ b/src/surrealql.grammar @@ -19,18 +19,86 @@ or @left } -@skip { whitespace | LineComment } +@skip { whitespace | LineComment | BlockComment } + +@external tokens tokens from "./tokens" { + As, + Since, + Limit, + Collate, + Numeric, + Asc, + Desc, + With, + Order, + Omit, + Where, + From, + Split, + Group, + Start, + Timeout, + Parallel, + Explain, + In, + Then, + Else, + End, + identifier +} +@external specialize {identifier} specializeIdent from "./tokens" { + Return, + Transaction, + Begin, + Break, + Cancel, + Commit, + Continue, + Use, + Ns, + Db, + Throw, + Sleep, + Show, + For, + Let, + Info, + Root, + Namespace, + Database, + Scope, + Value, + At, + By, + Noindex, + Index, + Only, + Full, + If, + True, + False, + None, + Null, + Comparison, + And, + Or, + Select +} +@external extend {identifier} extendIdent from "./tokens" { + Changes, + Table +} kw { @specialize[@name={term}] } ckw { @extend[@name={term}] } -Table { identifier } +TableId { identifier } RecordID { - Table ":" + TableId ":" (identifier | RIDStart RIDContent RIDEnd | RIDDelim RIDDelimContent RIDDelim | Array | Object) } RecordRange { - Table ":" + TableId ":" ( number? (".." | "..=") number? | Array? (".." | "..=") Array? | @@ -38,7 +106,7 @@ RecordRange { ) } GeneratedRecordID { - Table ":" FunctionCall + TableId ":" FunctionCall } Variable { "$" identifier @@ -57,11 +125,17 @@ StringPrefix { } Integer { int } -Decimal { int !int ("." int)? "dec" } -Float { int !int (("." int) "f"? | "f" ) } +Decimal { int !int ("." int)? kw<"dec"> } +Float { int !int (("." int) ckw<"f">? | ckw<"f"> ) } Duration { int DurationUnit } DurationUnit { - "d" | "h" | "m" | "s" | "ms" | "us" | "ns" + ckw<"d"> | + ckw<"h"> | + ckw<"m"> | + ckw<"s"> | + ckw<"ms"> | + ckw<"us"> | + ckw<"ns"> } number { Decimal | Float | Integer } @@ -75,17 +149,17 @@ Property { Object { "{" (Property ":" expression (Comma Property ":" expression)*)? "}" } -Namespace { +NamespaceId { identifier } FunctionName { identifier } FunctionCall { - (Namespace "::" )* FunctionName "(" (expression (Comma expression)*)? ")" + (NamespaceId "::" )* FunctionName "(" (expression (Comma expression)*)? ")" } Constant { - (Namespace "::")? + (NamespaceId "::")+ identifier } Cast { @@ -100,14 +174,17 @@ Column { identifier } Field { - expression ("as" identifier)? | + expression (As identifier)? | Star } +ReferencedFulltextOperator { + "@" identifier "@" +} BinaryExpression { expression !and And expression | expression !or Or expression | expression !comparison ( - Comparison | "@" identifier "@" + Comparison | ReferencedFulltextOperator ) expression | expression !exp Raise expression | expression !times (DivideOrMultiply) expression | @@ -118,114 +195,121 @@ BinaryExpression { } ReturnStatement { - kw<"return"> expression + Return expression } maybeTransaction { - kw<"transaction">? + Transaction? } BeginStatement { - kw<"begin"> maybeTransaction + Begin maybeTransaction } BreakStatement { - kw<"break"> + Break } CancelStatement { - kw<"cancel"> maybeTransaction + Cancel maybeTransaction } CommitStatement { - kw<"commit"> maybeTransaction + Commit maybeTransaction } ContinueStatement { - kw<"continue"> + Continue } UseStatement { - kw<"use"> - (kw<"ns"> identifier)? - (kw<"db"> identifier)? + Use + (Ns identifier)? + (Db identifier)? } ThrowStatement { - kw<"throw"> expression + Throw expression } SleepStatement { - kw<"sleep"> expression + Sleep expression } ShowStatement { - kw<"show"> ckw<"changes"> kw<"for"> ckw<"table"> + Show Changes For Table expression - ("since" expression)? - ("limit" expression)? + (Since expression)? + (Limit expression)? } LetStatement { - kw<"let"> Variable "=" (expression | statement) + Let Variable "=" (expression | statement) } InfoStatement { - kw<"info"> kw<"for"> + Info For ( - kw<"root"> | - kw<"ns"> | kw<"namespace"> | - kw<"db"> | kw<"database"> | - kw<"scope"> expression | - ckw<"table"> expression + Root | + Ns | Namespace | + Db | Database | + Scope expression | + Table expression ) } orderBy { - expression ("collate" | "numeric")? ("asc" | "desc")? + expression (Collate | Numeric)? (Asc | Desc)? } maybeValue { - kw<"value">? + Value? } maybeAt { - kw<"at">? + At? } maybeBy { - kw<"by">? + By? } selectWith { - ("with" (kw<"noindex"> | kw<"index"> identifier (Comma identifier)))? + (With (Noindex | Index identifier (Comma identifier)))? } maybeOnly { - kw<"only">? + Only? } selectOrder { - ("order" maybeBy orderBy (Comma orderBy)*)? + (Order maybeBy orderBy (Comma orderBy)*)? +} +selectSource { + expression | + RecordRange | + GeneratedRecordID } selectFrom { - ("from" maybeOnly expression (Comma expression)*)? + (From maybeOnly selectSource (Comma selectSource)*)? } -Where { - "where" expression +WhereQueryPart { + Where expression } SelectStatement { - kw<"select"> + Select maybeValue (Field (Comma Field)*) - ("omit" Field (Comma Field)*)? + (Omit Field (Comma Field)*)? selectFrom selectWith - Where? - ("split" maybeAt expression)? - ("group" maybeBy expression (Comma expression)*)? + WhereQueryPart? + (Split maybeAt expression)? + (Group maybeBy expression (Comma expression)*)? selectOrder - ("limit" maybeBy expression)? - ("start" maybeAt expression)? - ("timeout" expression)? - "parallel"? - ("explain" kw<"full">?)? + (Limit maybeBy expression)? + (Start maybeAt expression)? + (Timeout expression)? + Parallel? + (Explain Full?)? } ForStatement { - kw<"for"> expression "in" expression "{" (statement Semi)* statement "}" + For expression In expression "{" (statement Semi)* statement "}" } -// IfStatement { -// kw<"if"> expression "then"? expression -// ("else" "if" expression "then"? expression)* -// ("else" expression)? -// } +IfStatement { + If expression Then expression + (Else If expression Then expression)* + (Else expression)? + End +} -Bool { kw<"true"> | kw<"false"> } -None { kw<"none"> } -Null { kw<"null"> } +Bool { + True | + False +} expression { String | @@ -243,7 +327,8 @@ expression { BinaryExpression | Bool | None | - Null + Null | + Constant } ParenthesizedExpression { "(" (expression | statement) ")" @@ -263,8 +348,8 @@ statement[@isGroup=Statement] { LetStatement | InfoStatement | SelectStatement | - ForStatement // | - // IfStatement + ForStatement | + IfStatement } @local tokens { @@ -281,8 +366,6 @@ statement[@isGroup=Statement] { int { @digit+ } - identifier { $[a-zA-Z] $[a-zA-Z0-9_]* } - RIDDelim { "`" } RIDStart { "⟨" } RIDEnd { "⟩" } @@ -307,31 +390,6 @@ statement[@isGroup=Statement] { Comma { "," } Semi { ";" } - Comparison { - "??" | "?:" | - "=" | "IS" | - "!=" | "IS NOT"| - "==" | - "?=" | "*=" | - "~" | "!~" | "?~" | "*~" | - "IN" | "NOT IN" | - "CONTAINS" | "∋" | - "CONTAINSNOT" | "∌" | - "CONTAINSALL" | "⊇" | - "CONTAINSANY" | "⊃" | - "CONTAINSNONE" | "⊅" | - "INSIDE" | "∈" | - "NOTINSIDE" | "NOT IN" | "∉" | - "ALLINSIDE" | "⊆" | - "ANYINSIDE" | "⊂" | - "NONEINSIDE" | "⊄" | - "OUTSIDE" | - "INTERSECTS" | - "<" | ">" | "<=" | ">=" | - "@@" - } - And { "&&" | "and" } - Or { "||" | "or" } "/*" "(" ")" "[" "]" "{" "}" diff --git a/src/surrealql.ts b/src/surrealql.ts index 31af96d..e741c7b 100644 --- a/src/surrealql.ts +++ b/src/surrealql.ts @@ -17,12 +17,13 @@ export let parser = baseParser.configure({ Statement: continuedIndent() }), styleTags({ - "StringPrefix as asc desc": t.keyword, + "StringPrefix as asc desc collate numeric": t.keyword, + "select from where group by having order limit return transaction begin break cancel commit continue use db ns sleep show changes for table since info namespace database scope table value at with noindex index only omit split start timeout parallel explain full in": t.keyword, Escape: t.escape, Bool: t.bool, "DivideOrMultiply AddOrSubtract": t.arithmeticOperator, "let ": t.definitionKeyword, - Namespace: t.namespace, + NamespaceId: t.namespace, FunctionName: t.function(t.variableName), Variable: t.variableName, Star: t.atom, @@ -32,6 +33,7 @@ export let parser = baseParser.configure({ LineComment: t.lineComment, BlockComment: t.blockComment, String: t.string, + Constant: t.constant(t.variableName), "return break ForStatement/for if else then": t.controlKeyword, "DurationUnit": t.unit, "None Null": t.null, @@ -44,7 +46,8 @@ export let parser = baseParser.configure({ "Semi Comma": t.separator, "( )": t.paren, "{ }": t.brace, - "[ ]": t.squareBracket + "[ ]": t.squareBracket, + "< >": t.angleBracket, }) ] }); diff --git a/src/tokens.ts b/src/tokens.ts new file mode 100644 index 0000000..2cb5379 --- /dev/null +++ b/src/tokens.ts @@ -0,0 +1,262 @@ +import { ExternalTokenizer, InputStream } from "@lezer/lr" +import { + // tokens + As, + Since, + Limit, + Collate, + Numeric, + Asc, + Desc, + With, + Order, + Omit, + Where, + From, + Split, + Group, + Start, + Timeout, + Parallel, + Explain, + In, + Then, + Else, + End, + + // specialize + Return, + Transaction, + Begin, + Break, + Cancel, + Commit, + Continue, + Use, + Ns, + Db, + Throw, + Sleep, + Show, + For, + Let, + Info, + Root, + Namespace, + Database, + Scope, + Value, + At, + By, + Noindex, + Index, + Only, + Full, + If, + True, + False, + None, + Null, + Comparison, + And, + Or, + Select, + + // extend + Changes, + Table, + identifier +} from "./surrealql.grammar.terms" + +const enum Ch { + Newline = 10, + Space = 32, + DoubleQuote = 34, + Hash = 35, + Dollar = 36, + SingleQuote = 39, + ParenL = 40, ParenR = 41, + Star = 42, + Plus = 43, + Comma = 44, + Dash = 45, + Dot = 46, + Slash = 47, + Colon = 58, + Semi = 59, + Question = 63, + At = 64, + BracketL = 91, BracketR = 93, + Backslash = 92, + Underscore = 95, + Backtick = 96, + BraceL = 123, BraceR = 125, + + A = 65, a = 97, + B = 66, b = 98, + E = 69, e = 101, + F = 70, f = 102, + N = 78, n = 110, + Q = 81, q = 113, + X = 88, x = 120, + Z = 90, z = 122, + + _0 = 48, _1 = 49, _9 = 57, +} + +const kwmap = new Map([ + [[ + "??", "?:", + "=", "is", + "!=", "is not", + "==", + "?=", "*=", + "~", "!~", "?~", "*~", + "in", "not in", + "contains", "∋", + "containsnot", "∌", + "containsall", "⊇", + "containsany", "⊃", + "containsnone", "⊅", + "inside", "∈", + "notinside", "not in", "∉", + "allinside", "⊆", + "anyinside", "⊂", + "noneinside", "⊄", + "outside", + "intersects", + "<", ">", "<=", ">=", + "@@", "@" + ], Comparison], + [["&&", "and"], And], + [["||", "or"], Or], + + [["as"], As], + [["since"], Since], + [["limit"], Limit], + [["collate"], Collate], + [["numeric"], Numeric], + [["asc"], Asc], + [["desc"], Desc], + [["with"], With], + [["order"], Order], + [["omit"], Omit], + [["where"], Where], + [["from"], From], + [["split"], Split], + [["group"], Group], + [["start"], Start], + [["timeout"], Timeout], + [["parallel"], Parallel], + [["explain"], Explain], + [["in"], In], + [["then"], Then], + [["else"], Else], + [["end"], End], +]); +const specmap = new Map([ + ["return", Return], + ["transaction", Transaction], + ["begin", Begin], + ["break", Break], + ["cancel", Cancel], + ["commit", Commit], + ["continue", Continue], + ["use", Use], + ["ns", Ns], + ["db", Db], + ["throw", Throw], + ["sleep", Sleep], + ["show", Show], + ["for", For], + ["let", Let], + ["info", Info], + ["for", For], + ["root", Root], + ["namespace", Namespace], + ["database", Database], + ["scope", Scope], + ["value", Value], + ["at", At], + ["by", By], + ["noindex", Noindex], + ["index", Index], + ["only", Only], + ["full", Full], + ["if", If], + ["true", True], + ["false", False], + ["none", None], + ["null", Null], + ["comparison", Comparison], + ["and", And], + ["or", Or], + ["select", Select] +]); +const cspecmap = new Map([ + ["changes", Changes], + ["table", Table], +]); + +function isAlpha(ch: number) { + return ch >= Ch.A && ch <= Ch.Z || ch >= Ch.a && ch <= Ch.z +} +function isAlphaNum(ch: number) { + return ch >= Ch.A && ch <= Ch.Z || ch >= Ch.a && ch <= Ch.z || ch >= Ch._0 && ch <= Ch._9 +} + +function readWord(input: InputStream, result?: string) { + for (;;) { + if (input.next != Ch.Underscore && !isAlphaNum(input.next)) break + if (result != null) result += String.fromCharCode(input.next) + input.advance() + } + return result +} +function skipSpaces(input: InputStream) { + for (;;) { + if (input.next != Ch.Space && input.next != Ch.Newline) break + input.advance() + } +} + +export const tokens = new ExternalTokenizer((input, stack) => { + let {next} = input; + if(isAlpha(next)) { + input.advance() + let word = readWord(input, String.fromCharCode(next)) + console.log("Got word", word) + let word2: string | undefined = undefined + if (word != null) { + word = word.toLowerCase() + if(["is", "not"].includes(word)) { + // needs another word + skipSpaces(input) + word2 = readWord(input) + if(!word2) return; + word = word + " " + word2.toLowerCase() + } + for(let [kws, token] of kwmap) { + if(kws.includes(word)) { + input.acceptToken(token) + return + } + } + console.log("accepting identifier", word) + input.acceptToken(identifier) + } + } +}, {contextual: true}) + +export const specializeIdent = (text, stack) => { + if(specmap.has(text)) { + return specmap.get(text) + } + // console.log("sspec", text) +} +export const extendIdent = (text, stack) => { + if(cspecmap.has(text)) { + return cspecmap.get(text) + } + console.log("cspec", text) +} \ No newline at end of file diff --git a/test.js b/test.js index c296640..33ec2dc 100644 --- a/test.js +++ b/test.js @@ -1,2 +1,4 @@ import {baseParser} from "./dist/index.js" -console.log(baseParser.parse('begin transaction').toString()) \ No newline at end of file +// console.log(baseParser.parse('select field from type::table($var) where $var > 1 and field2 @@ field1 timeout 10s').toString()) +// console.log(baseParser.parse('select * from documents where contents @@ "test"').toString()) +console.log(baseParser.parse('select * from documents').toString()) \ No newline at end of file