Update jslex with newer syntax

This commit is contained in:
James Bligh 2025-06-16 19:51:23 +01:00
parent 4634bf064f
commit cb3afe117c
2 changed files with 194 additions and 13 deletions

View file

@ -1,6 +1,8 @@
"""JsLex: a lexer for JavaScript"""
# Originally from https://bitbucket.org/ned/jslex
# originally contributed by Ned Batchelder, the author of jslex
# See https://github.com/django/django/commit/64e19ffb4ee32767861d25c874f0d2dfc75618b7
# jslex is also published at https://github.com/nedbat/jslex
import re
@ -107,11 +109,11 @@ class JsLexer(Lexer):
"keyword",
literals(
"""
break case catch class const continue debugger
async await break case catch class const continue debugger
default delete do else enum export extends
finally for function if import in instanceof
new return super switch this throw try typeof
var void while with
let new return static super switch this throw try typeof
var void while with yield
""",
suffix=r"\b",
),
@ -126,21 +128,27 @@ class JsLexer(Lexer):
""",
next="div",
),
Tok("hnum", r"0[xX][0-9a-fA-F]+", next="div"),
Tok("hbigint", r"0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*n", next="div"),
Tok("hnum", r"0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*", next="div"),
Tok("bbigint", r"0[bB][01]+(_[01]+)*n", next="div"),
Tok("bnum", r"0[bB][01]+(_[01]+)*", next="div"),
Tok("obigint", r"0[oO][0-7]+(_[0-7]+)*n", next="div"),
Tok("onum", r"0[oO][0-7]+(_[0-7]+)*", next="div"),
Tok("dbigint", r"(0|[1-9][0-9]*(_[0-9]+)*)n", next="div"),
Tok("onum", r"0[0-7]+"),
Tok(
"dnum",
r"""
( (0|[1-9][0-9]*) # DecimalIntegerLiteral
( (0|[1-9][0-9]*(_[0-9]+)*) # DecimalIntegerLiteral
\. # dot
[0-9]* # DecimalDigits-opt
[0-9]*(_[0-9]+)* # DecimalDigits-opt
([eE][-+]?[0-9]+)? # ExponentPart-opt
|
\. # dot
[0-9]+ # DecimalDigits
[0-9]+(_[0-9]+)* # DecimalDigits
([eE][-+]?[0-9]+)? # ExponentPart-opt
|
(0|[1-9][0-9]*) # DecimalIntegerLiteral
(0|[1-9][0-9]*(_[0-9]+)*) # DecimalIntegerLiteral
([eE][-+]?[0-9]+)? # ExponentPart-opt
)
""",
@ -150,9 +158,9 @@ class JsLexer(Lexer):
"punct",
literals(
"""
>>>= === !== >>> <<= >>= <= >= == != << >> &&
|| += -= *= %= &= |= ^=
"""
>>>= === !== >>> <<= >>= <= >= == != << >> &&= && => ?. ??= ??
**= ** ||= || += -= *= %= &= |= ^=
"""
),
next="reg",
),
@ -160,7 +168,7 @@ class JsLexer(Lexer):
Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next="reg"),
Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next="div"),
Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next="div"),
Tok("string", r"`([^'\\]|(\\(.|\n)))*?`", next="div"),
Tok("string", r"`([^`\\]|(\\(.|\n))|\$\{[^}]*\})*?`", next="div"),
]
both_after = [

View file

@ -272,6 +272,179 @@ class JsTokensTest(SimpleTestCase):
"punct ;",
],
),
# Template literals
("`hello world`", ["string `hello world`"]),
("`hello ${name}!`", ["string `hello ${name}!`"]),
("`multiline\\nstring`", ["string `multiline\\nstring`"]),
# Arrow functions
("() => x", ["punct (", "punct )", "punct =>", "id x"]),
("a => a * 2", ["id a", "punct =>", "id a", "punct *", "dnum 2"]),
# Let keyword
("let x = 5", ["keyword let", "id x", "punct =", "dnum 5"]),
(
"let let_var = true",
["keyword let", "id let_var", "punct =", "reserved true"],
),
# Binary literals
("0b1010 0B1111 0b0", ["bnum 0b1010", "bnum 0B1111", "bnum 0b0"]),
("0b1010abc", ["bnum 0b1010", "id abc"]),
# New octal literals
("0o755 0O644 0o17", ["onum 0o755", "onum 0O644", "onum 0o17"]),
("0o755abc", ["onum 0o755", "id abc"]),
# Async/await keywords
(
"async function test() {}",
[
"keyword async",
"keyword function",
"id test",
"punct (",
"punct )",
"punct {",
"punct }",
],
),
("await promise", ["keyword await", "id promise"]),
(
"async () => await fetch()",
[
"keyword async",
"punct (",
"punct )",
"punct =>",
"keyword await",
"id fetch",
"punct (",
"punct )",
],
),
# Exponentiation operator
("2 ** 3", ["dnum 2", "punct **", "dnum 3"]),
("x **= 2", ["id x", "punct **=", "dnum 2"]),
("2**3**4", ["dnum 2", "punct **", "dnum 3", "punct **", "dnum 4"]),
# Nullish coalescing
("x ?? y", ["id x", "punct ??", "id y"]),
("x ??= y", ["id x", "punct ??=", "id y"]),
("null ?? 'default'", ["reserved null", "punct ??", "string 'default'"]),
# Optional chaining
("obj?.prop", ["id obj", "punct ?.", "id prop"]),
(
"obj?.method?.()",
["id obj", "punct ?.", "id method", "punct ?.", "punct (", "punct )"],
),
("arr?.[0]", ["id arr", "punct ?.", "punct [", "dnum 0", "punct ]"]),
# Logical assignment
("x &&= y", ["id x", "punct &&=", "id y"]),
("x ||= y", ["id x", "punct ||=", "id y"]),
(
"flag &&= isValid()",
["id flag", "punct &&=", "id isValid", "punct (", "punct )"],
),
# Numeric separators
("1_000_000", ["dnum 1_000_000"]),
("3.14_159", ["dnum 3.14_159"]),
("0xFF_EC_DE_5E", ["hnum 0xFF_EC_DE_5E"]),
("0b1010_0001", ["bnum 0b1010_0001"]),
("0o755_644", ["onum 0o755_644"]),
# BigInt literals
("123n", ["dbigint 123n"]),
("0xFFn", ["hbigint 0xFFn"]),
("0b1010n", ["bbigint 0b1010n"]),
("0o755n", ["obigint 0o755n"]),
("1_000_000n", ["dbigint 1_000_000n"]),
("0xFF_EC_DE_5En", ["hbigint 0xFF_EC_DE_5En"]),
# Yield keyword
("yield x", ["keyword yield", "id x"]),
(
"yield* generator()",
["keyword yield", "punct *", "id generator", "punct (", "punct )"],
),
(
"function* gen() { yield 1; }",
[
"keyword function",
"punct *",
"id gen",
"punct (",
"punct )",
"punct {",
"keyword yield",
"dnum 1",
"punct ;",
"punct }",
],
),
# Static keyword
(
"static method() {}",
["keyword static", "id method", "punct (", "punct )", "punct {", "punct }"],
),
("static prop = 5", ["keyword static", "id prop", "punct =", "dnum 5"]),
# Complex combinations
(
"const fn = async (x) => await x?.result ?? 'default'",
[
"keyword const",
"id fn",
"punct =",
"keyword async",
"punct (",
"id x",
"punct )",
"punct =>",
"keyword await",
"id x",
"punct ?.",
"id result",
"punct ??",
"string 'default'",
],
),
(
"let big = 1_000n ** 2n",
[
"keyword let",
"id big",
"punct =",
"dbigint 1_000n",
"punct **",
"dbigint 2n",
],
),
(
"obj.prop ||= `default ${value}`",
["id obj", "punct .", "id prop", "punct ||=", "string `default ${value}`"],
),
# Edge cases
(
"0b1010 + 0o755 + 0xFF",
["bnum 0b1010", "punct +", "onum 0o755", "punct +", "hnum 0xFF"],
),
("x?.y?.z", ["id x", "punct ?.", "id y", "punct ?.", "id z"]),
("a ?? b ?? c", ["id a", "punct ??", "id b", "punct ??", "id c"]),
("**=", ["punct **="]),
("?.??", ["punct ?.", "punct ??"]),
# Regex with new flags (should still work with existing pattern)
("/test/gimsuy", ["regex /test/gimsuy"]),
("/pattern/u", ["regex /pattern/u"]),
("/sticky/y", ["regex /sticky/y"]),
("/dotall/s", ["regex /dotall/s"]),
# Mixed old and new features
(
"var old = 5; let x = 0b101;",
[
"keyword var",
"id old",
"punct =",
"dnum 5",
"punct ;",
"keyword let",
"id x",
"punct =",
"bnum 0b101",
"punct ;",
],
),
]