Rollup merge of #149057 - lnicola:sync-from-ra, r=lnicola

`rust-analyzer` subtree update

Subtree update of `rust-analyzer` to afcfe141ae.

Created using https://github.com/rust-lang/josh-sync.

r? `@ghost`
This commit is contained in:
Matthias Krüger 2025-11-18 16:52:12 +01:00 committed by GitHub
commit dba9069adc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 5997 additions and 411 deletions

568
Cargo.lock generated
View file

@ -17,12 +17,27 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstyle"
version = "1.0.11"
@ -107,6 +122,21 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bit-set"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -134,6 +164,12 @@ version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e"
[[package]]
name = "bumpalo"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "byteorder"
version = "1.5.0"
@ -220,6 +256,12 @@ dependencies = [
"thiserror 2.0.16",
]
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.2.38"
@ -258,6 +300,33 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.5.48"
@ -313,6 +382,39 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "criterion"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"itertools 0.13.0",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
dependencies = [
"cast",
"itertools 0.13.0",
]
[[package]]
name = "critical-section"
version = "1.2.0"
@ -362,6 +464,12 @@ version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crunchy"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "ctrlc"
version = "3.5.0"
@ -526,6 +634,16 @@ dependencies = [
"typeid",
]
[[package]]
name = "errno"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys 0.61.0",
]
[[package]]
name = "expect-test"
version = "1.5.1"
@ -536,6 +654,12 @@ dependencies = [
"once_cell",
]
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "find-msvc-tools"
version = "0.1.2"
@ -558,6 +682,12 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.5"
@ -599,12 +729,35 @@ dependencies = [
"wasi",
]
[[package]]
name = "getrandom"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasip2",
]
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "half"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"cfg-if",
"crunchy",
"zerocopy",
]
[[package]]
name = "hash32"
version = "0.2.1"
@ -680,7 +833,7 @@ dependencies = [
"hir-ty",
"indexmap",
"intern",
"itertools",
"itertools 0.14.0",
"ra-ap-rustc_type_ir",
"rustc-hash 2.1.1",
"smallvec",
@ -713,7 +866,7 @@ dependencies = [
"hir-expand",
"indexmap",
"intern",
"itertools",
"itertools 0.14.0",
"la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"mbe",
"query-group-macro",
@ -730,7 +883,7 @@ dependencies = [
"syntax-bridge",
"test-fixture",
"test-utils",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"thin-vec",
"tracing",
"triomphe",
@ -747,7 +900,7 @@ dependencies = [
"either",
"expect-test",
"intern",
"itertools",
"itertools 0.14.0",
"mbe",
"parser",
"query-group-macro",
@ -779,7 +932,7 @@ dependencies = [
"hir-expand",
"indexmap",
"intern",
"itertools",
"itertools 0.14.0",
"la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"macros",
"oorandom",
@ -921,7 +1074,7 @@ dependencies = [
"ide-db",
"ide-diagnostics",
"ide-ssr",
"itertools",
"itertools 0.14.0",
"macros",
"nohash-hasher",
"oorandom",
@ -950,7 +1103,7 @@ dependencies = [
"expect-test",
"hir",
"ide-db",
"itertools",
"itertools 0.14.0",
"smallvec",
"stdx",
"syntax",
@ -968,7 +1121,7 @@ dependencies = [
"expect-test",
"hir",
"ide-db",
"itertools",
"itertools 0.14.0",
"macros",
"smallvec",
"stdx",
@ -992,7 +1145,7 @@ dependencies = [
"fst",
"hir",
"indexmap",
"itertools",
"itertools 0.14.0",
"line-index 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"macros",
"memchr",
@ -1025,7 +1178,7 @@ dependencies = [
"expect-test",
"hir",
"ide-db",
"itertools",
"itertools 0.14.0",
"paths",
"serde_json",
"stdx",
@ -1043,7 +1196,7 @@ dependencies = [
"expect-test",
"hir",
"ide-db",
"itertools",
"itertools 0.14.0",
"parser",
"syntax",
"test-fixture",
@ -1132,6 +1285,15 @@ dependencies = [
"rustversion",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.14.0"
@ -1153,6 +1315,16 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a037eddb7d28de1d0fc42411f501b53b75838d313908078d6698d064f3029b24"
[[package]]
name = "js-sys"
version = "0.3.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "kqueue"
version = "1.1.1"
@ -1231,7 +1403,7 @@ version = "0.1.2"
dependencies = [
"nohash-hasher",
"oorandom",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -1241,9 +1413,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e27e0ed5a392a7f5ba0b3808a2afccff16c64933312c84b57618b49d1209bd2"
dependencies = [
"nohash-hasher",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]]
name = "litemap"
version = "0.8.0"
@ -1259,7 +1437,7 @@ dependencies = [
"hir-expand",
"ide-db",
"intern",
"itertools",
"itertools 0.14.0",
"proc-macro-api",
"project-model",
"span",
@ -1639,6 +1817,34 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "plotters"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
[[package]]
name = "plotters-svg"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
dependencies = [
"plotters-backend",
]
[[package]]
name = "portable-atomic"
version = "1.11.1"
@ -1671,6 +1877,15 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro-api"
version = "0.0.0"
@ -1767,7 +1982,7 @@ dependencies = [
"cfg",
"expect-test",
"intern",
"itertools",
"itertools 0.14.0",
"la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"paths",
"rustc-hash 2.1.1",
@ -1783,6 +1998,25 @@ dependencies = [
"triomphe",
]
[[package]]
name = "proptest"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40"
dependencies = [
"bit-set",
"bit-vec",
"bitflags 2.9.4",
"num-traits",
"rand",
"rand_chacha",
"rand_xorshift",
"regex-syntax",
"rusty-fork",
"tempfile",
"unarray",
]
[[package]]
name = "protobuf"
version = "3.7.1"
@ -1835,6 +2069,12 @@ dependencies = [
"syn",
]
[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quote"
version = "1.0.40"
@ -1844,6 +2084,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "ra-ap-rustc_abi"
version = "0.137.0"
@ -1971,6 +2217,44 @@ dependencies = [
"synstructure",
]
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom 0.3.4",
]
[[package]]
name = "rand_xorshift"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
dependencies = [
"rand_core",
]
[[package]]
name = "rayon"
version = "1.11.0"
@ -2006,11 +2290,40 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac"
dependencies = [
"getrandom",
"getrandom 0.2.16",
"libredox",
"thiserror 2.0.16",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rowan"
version = "0.15.15"
@ -2021,7 +2334,7 @@ dependencies = [
"hashbrown 0.14.5",
"memoffset",
"rustc-hash 1.1.0",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -2046,7 +2359,7 @@ dependencies = [
"ide-ssr",
"indexmap",
"intern",
"itertools",
"itertools 0.14.0",
"load-cargo",
"lsp-server 0.7.9 (registry+https://github.com/rust-lang/crates.io-index)",
"lsp-types",
@ -2147,12 +2460,37 @@ dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.9.4",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.61.0",
]
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "rusty-fork"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2"
dependencies = [
"fnv",
"quick-error",
"tempfile",
"wait-timeout",
]
[[package]]
name = "ryu"
version = "1.0.20"
@ -2328,6 +2666,15 @@ dependencies = [
"serde",
]
[[package]]
name = "serde_test"
version = "1.0.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f901ee573cab6b3060453d2d5f0bae4e6d628c23c0a962ff9b5f1d7c8d4f1ed"
dependencies = [
"serde",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@ -2359,6 +2706,20 @@ dependencies = [
"serde",
]
[[package]]
name = "smol_str"
version = "0.3.4"
dependencies = [
"arbitrary",
"borsh",
"criterion",
"proptest",
"rand",
"serde",
"serde_core",
"serde_json",
]
[[package]]
name = "span"
version = "0.0.0"
@ -2369,7 +2730,7 @@ dependencies = [
"salsa",
"stdx",
"syntax",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"vfs",
]
@ -2388,6 +2749,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "stdx"
version = "0.0.0"
@ -2395,7 +2762,7 @@ dependencies = [
"backtrace",
"crossbeam-channel",
"crossbeam-utils",
"itertools",
"itertools 0.14.0",
"jod-thread",
"libc",
"miow",
@ -2431,14 +2798,14 @@ version = "0.0.0"
dependencies = [
"either",
"expect-test",
"itertools",
"itertools 0.14.0",
"parser",
"rayon",
"rowan",
"rustc-hash 2.1.1",
"rustc-literal-escaper 0.0.4",
"rustc_apfloat",
"smol_str",
"smol_str 0.3.2",
"stdx",
"test-utils",
"tracing",
@ -2465,6 +2832,19 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83176759e9416cf81ee66cb6508dbfe9c96f20b8b56265a39917551c23c70964"
[[package]]
name = "tempfile"
version = "3.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
dependencies = [
"fastrand",
"getrandom 0.3.4",
"once_cell",
"rustix",
"windows-sys 0.61.0",
]
[[package]]
name = "tenthash"
version = "1.1.0"
@ -2497,7 +2877,16 @@ dependencies = [
"profile",
"rustc-hash 2.1.1",
"stdx",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "text-size"
version = "1.1.1"
dependencies = [
"serde",
"serde_test",
"static_assertions",
]
[[package]]
@ -2641,6 +3030,16 @@ dependencies = [
"zerovec",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "toml"
version = "0.8.23"
@ -2772,7 +3171,7 @@ dependencies = [
"intern",
"ra-ap-rustc_lexer",
"stdx",
"text-size",
"text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -2787,12 +3186,30 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c"
[[package]]
name = "unarray"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
[[package]]
name = "ungrammar"
version = "1.16.1"
[[package]]
name = "ungrammar"
version = "1.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e5df347f0bf3ec1d670aad6ca5c6a1859cd9ea61d2113125794654ccced68f"
[[package]]
name = "ungrammar2json"
version = "1.0.0"
dependencies = [
"ungrammar 1.16.1",
"write-json",
]
[[package]]
name = "unicase"
version = "2.8.1"
@ -2870,6 +3287,15 @@ dependencies = [
"walkdir",
]
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "walkdir"
version = "2.5.0"
@ -2886,6 +3312,70 @@ version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wasip2"
version = "1.0.1+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
dependencies = [
"wit-bindgen",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
dependencies = [
"bumpalo",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
dependencies = [
"unicode-ident",
]
[[package]]
name = "web-sys"
version = "0.3.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi-util"
version = "0.1.11"
@ -3168,6 +3658,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
[[package]]
name = "write-json"
version = "0.1.4"
@ -3218,12 +3714,12 @@ dependencies = [
"edition",
"either",
"flate2",
"itertools",
"itertools 0.14.0",
"proc-macro2",
"quote",
"stdx",
"time",
"ungrammar",
"ungrammar 1.16.1 (registry+https://github.com/rust-lang/crates.io-index)",
"write-json",
"xflags",
"xshell",
@ -3254,6 +3750,26 @@ dependencies = [
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "zerofrom"
version = "0.1.6"

View file

@ -1,5 +1,5 @@
[workspace]
members = ["xtask/", "lib/*", "crates/*"]
members = ["xtask/", "lib/*", "lib/ungrammar/ungrammar2json", "crates/*"]
exclude = ["crates/proc-macro-srv/proc-macro-test/imp"]
resolver = "2"
@ -42,7 +42,7 @@ debug = 2
# lsp-server = { path = "lib/lsp-server" }
# ungrammar = { path = "../ungrammar" }
# ungrammar = { path = "lin/ungrammar" }
# salsa = { path = "../salsa" }
# salsa-macros = { path = "../salsa/components/salsa-macros" }

View file

@ -674,10 +674,13 @@ impl<'db> InferenceTable<'db> {
let args = [ty, arg_ty];
let trait_ref = TraitRef::new(self.interner(), fn_trait.into(), args);
let proj_args = self
.infer_ctxt
.fill_rest_fresh_args(output_assoc_type.into(), args.into_iter().map(Into::into));
let projection = Ty::new_alias(
self.interner(),
rustc_type_ir::AliasTyKind::Projection,
AliasTy::new(self.interner(), output_assoc_type.into(), args),
AliasTy::new(self.interner(), output_assoc_type.into(), proj_args),
);
let pred = Predicate::upcast_from(trait_ref, self.interner());

View file

@ -524,3 +524,31 @@ fn g(it: *const (dyn Trait)) {
"#,
);
}
#[test]
fn regression_20951() {
check_infer(
r#"
//- minicore: async_fn
trait DoesSomething {
fn do_something(&self) -> impl Future<Output = usize>;
}
impl<F> DoesSomething for F
where
F: AsyncFn() -> usize,
{
fn do_something(&self) -> impl Future<Output = usize> {
self()
}
}
"#,
expect![[r#"
43..47 'self': &'? Self
168..172 'self': &'? F
205..227 '{ ... }': <F as AsyncFnMut<()>>::CallRefFuture<'<erased>>
215..219 'self': &'? F
215..221 'self()': <F as AsyncFnMut<()>>::CallRefFuture<'<erased>>
"#]],
);
}

View file

@ -35,6 +35,9 @@ pub mod term_search;
mod display;
#[doc(hidden)]
pub use hir_def::ModuleId;
use std::{
fmt,
mem::discriminant,
@ -48,8 +51,8 @@ use hir_def::{
AdtId, AssocItemId, AssocItemLoc, AttrDefId, CallableDefId, ConstId, ConstParamId,
CrateRootModuleId, DefWithBodyId, EnumId, EnumVariantId, ExternBlockId, ExternCrateId,
FunctionId, GenericDefId, GenericParamId, HasModule, ImplId, ItemContainerId, LifetimeParamId,
LocalFieldId, Lookup, MacroExpander, MacroId, ModuleId, StaticId, StructId, SyntheticSyntax,
TupleId, TypeAliasId, TypeOrConstParamId, TypeParamId, UnionId,
LocalFieldId, Lookup, MacroExpander, MacroId, StaticId, StructId, SyntheticSyntax, TupleId,
TypeAliasId, TypeOrConstParamId, TypeParamId, UnionId,
expr_store::{ExpressionStoreDiagnostics, ExpressionStoreSourceMap},
hir::{
BindingAnnotation, BindingId, Expr, ExprId, ExprOrPatId, LabelId, Pat,

View file

@ -18,7 +18,7 @@ use hir_ty::{
};
use intern::Symbol;
use rustc_hash::FxHashMap;
use syntax::{AstNode, AstPtr, SmolStr, SyntaxNode, SyntaxNodePtr, ToSmolStr, ast::HasName};
use syntax::{AstNode, AstPtr, SyntaxNode, SyntaxNodePtr, ToSmolStr, ast::HasName};
use crate::{HasCrate, Module, ModuleDef, Semantics};
@ -29,7 +29,7 @@ pub struct FileSymbol {
pub name: Symbol,
pub def: ModuleDef,
pub loc: DeclarationLocation,
pub container_name: Option<SmolStr>,
pub container_name: Option<Symbol>,
/// Whether this symbol is a doc alias for the original symbol.
pub is_alias: bool,
pub is_assoc: bool,
@ -65,23 +65,29 @@ pub struct SymbolCollector<'a> {
db: &'a dyn HirDatabase,
symbols: FxIndexSet<FileSymbol>,
work: Vec<SymbolCollectorWork>,
current_container_name: Option<SmolStr>,
current_container_name: Option<Symbol>,
collect_pub_only: bool,
}
/// Given a [`ModuleId`] and a [`HirDatabase`], use the DefMap for the module's crate to collect
/// all symbols that should be indexed for the given module.
impl<'a> SymbolCollector<'a> {
pub fn new(db: &'a dyn HirDatabase) -> Self {
pub fn new(db: &'a dyn HirDatabase, collect_pub_only: bool) -> Self {
SymbolCollector {
db,
symbols: Default::default(),
work: Default::default(),
current_container_name: None,
collect_pub_only,
}
}
pub fn new_module(db: &dyn HirDatabase, module: Module) -> Box<[FileSymbol]> {
let mut symbol_collector = SymbolCollector::new(db);
pub fn new_module(
db: &dyn HirDatabase,
module: Module,
collect_pub_only: bool,
) -> Box<[FileSymbol]> {
let mut symbol_collector = SymbolCollector::new(db, collect_pub_only);
symbol_collector.collect(module);
symbol_collector.finish()
}
@ -108,12 +114,16 @@ impl<'a> SymbolCollector<'a> {
tracing::info!(?work, "SymbolCollector::do_work");
self.db.unwind_if_revision_cancelled();
let parent_name = work.parent.map(|name| name.as_str().to_smolstr());
let parent_name = work.parent.map(|name| Symbol::intern(name.as_str()));
self.with_container_name(parent_name, |s| s.collect_from_module(work.module_id));
}
fn collect_from_module(&mut self, module_id: ModuleId) {
let push_decl = |this: &mut Self, def, name| {
let collect_pub_only = self.collect_pub_only;
let push_decl = |this: &mut Self, def: ModuleDefId, name, vis| {
if collect_pub_only && vis != Visibility::Public {
return;
}
match def {
ModuleDefId::ModuleId(id) => this.push_module(id, name),
ModuleDefId::FunctionId(id) => {
@ -125,7 +135,7 @@ impl<'a> SymbolCollector<'a> {
}
ModuleDefId::AdtId(AdtId::EnumId(id)) => {
this.push_decl(id, name, false, None);
let enum_name = this.db.enum_signature(id).name.as_str().to_smolstr();
let enum_name = Symbol::intern(this.db.enum_signature(id).name.as_str());
this.with_container_name(Some(enum_name), |this| {
let variants = id.enum_variants(this.db);
for (variant_id, variant_name, _) in &variants.variants {
@ -175,6 +185,9 @@ impl<'a> SymbolCollector<'a> {
};
let mut push_import = |this: &mut Self, i: ImportId, name: &Name, def: ModuleDefId, vis| {
if collect_pub_only && vis != Visibility::Public {
return;
}
let source = import_child_source_cache
.entry(i.use_)
.or_insert_with(|| i.use_.child_source(this.db));
@ -209,6 +222,9 @@ impl<'a> SymbolCollector<'a> {
let push_extern_crate =
|this: &mut Self, i: ExternCrateId, name: &Name, def: ModuleDefId, vis| {
if collect_pub_only && vis != Visibility::Public {
return;
}
let loc = i.lookup(this.db);
let source = loc.source(this.db);
let rename = source.value.rename().and_then(|rename| rename.name());
@ -258,7 +274,7 @@ impl<'a> SymbolCollector<'a> {
continue;
}
// self is a declaration
push_decl(self, def, name)
push_decl(self, def, name, vis)
}
for (name, Item { def, vis, import }) in scope.macros() {
@ -271,7 +287,7 @@ impl<'a> SymbolCollector<'a> {
continue;
}
// self is a declaration
push_decl(self, def.into(), name)
push_decl(self, ModuleDefId::MacroId(def), name, vis)
}
for (name, Item { def, vis, import }) in scope.values() {
@ -283,7 +299,7 @@ impl<'a> SymbolCollector<'a> {
continue;
}
// self is a declaration
push_decl(self, def, name)
push_decl(self, def, name, vis)
}
for const_id in scope.unnamed_consts() {
@ -304,6 +320,9 @@ impl<'a> SymbolCollector<'a> {
}
fn collect_from_body(&mut self, body_id: impl Into<DefWithBodyId>, name: Option<Name>) {
if self.collect_pub_only {
return;
}
let body_id = body_id.into();
let body = self.db.body(body_id);
@ -328,8 +347,13 @@ impl<'a> SymbolCollector<'a> {
)
.to_smolstr(),
);
self.with_container_name(impl_name, |s| {
self.with_container_name(impl_name.as_deref().map(Symbol::intern), |s| {
for &(ref name, assoc_item_id) in &impl_id.impl_items(self.db).items {
if s.collect_pub_only && s.db.assoc_visibility(assoc_item_id) != Visibility::Public
{
continue;
}
s.push_assoc_item(assoc_item_id, name, None)
}
})
@ -337,14 +361,14 @@ impl<'a> SymbolCollector<'a> {
fn collect_from_trait(&mut self, trait_id: TraitId, trait_do_not_complete: Complete) {
let trait_data = self.db.trait_signature(trait_id);
self.with_container_name(Some(trait_data.name.as_str().into()), |s| {
self.with_container_name(Some(Symbol::intern(trait_data.name.as_str())), |s| {
for &(ref name, assoc_item_id) in &trait_id.trait_items(self.db).items {
s.push_assoc_item(assoc_item_id, name, Some(trait_do_not_complete));
}
});
}
fn with_container_name(&mut self, container_name: Option<SmolStr>, f: impl FnOnce(&mut Self)) {
fn with_container_name(&mut self, container_name: Option<Symbol>, f: impl FnOnce(&mut Self)) {
if let Some(container_name) = container_name {
let prev = self.current_container_name.replace(container_name);
f(self);

View file

@ -1,7 +1,14 @@
use ide_db::syntax_helpers::node_ext::for_each_break_and_continue_expr;
use ide_db::{
source_change::SourceChangeBuilder, syntax_helpers::node_ext::for_each_break_and_continue_expr,
};
use syntax::{
T,
ast::{self, AstNode, HasLoopBody},
SyntaxToken, T,
ast::{
self, AstNode, HasLoopBody,
make::{self, tokens},
syntax_factory::SyntaxFactory,
},
syntax_editor::{Position, SyntaxEditor},
};
use crate::{AssistContext, AssistId, Assists};
@ -21,9 +28,9 @@ use crate::{AssistContext, AssistId, Assists};
// ->
// ```
// fn main() {
// 'l: loop {
// break 'l;
// continue 'l;
// ${1:'l}: loop {
// break ${2:'l};
// continue ${0:'l};
// }
// }
// ```
@ -39,30 +46,56 @@ pub(crate) fn add_label_to_loop(acc: &mut Assists, ctx: &AssistContext<'_>) -> O
"Add Label",
loop_expr.syntax().text_range(),
|builder| {
builder.insert(loop_kw.text_range().start(), "'l: ");
let make = SyntaxFactory::with_mappings();
let mut editor = builder.make_editor(loop_expr.syntax());
let label = make.lifetime("'l");
let elements = vec![
label.syntax().clone().into(),
make::token(T![:]).into(),
tokens::single_space().into(),
];
editor.insert_all(Position::before(&loop_kw), elements);
if let Some(cap) = ctx.config.snippet_cap {
editor.add_annotation(label.syntax(), builder.make_placeholder_snippet(cap));
}
let loop_body = loop_expr.loop_body().and_then(|it| it.stmt_list());
for_each_break_and_continue_expr(
loop_expr.label(),
loop_body,
&mut |expr| match expr {
ast::Expr::BreakExpr(break_expr) => {
if let Some(break_token) = break_expr.break_token() {
builder.insert(break_token.text_range().end(), " 'l")
}
}
ast::Expr::ContinueExpr(continue_expr) => {
if let Some(continue_token) = continue_expr.continue_token() {
builder.insert(continue_token.text_range().end(), " 'l")
}
}
_ => {}
},
);
for_each_break_and_continue_expr(loop_expr.label(), loop_body, &mut |expr| {
let token = match expr {
ast::Expr::BreakExpr(break_expr) => break_expr.break_token(),
ast::Expr::ContinueExpr(continue_expr) => continue_expr.continue_token(),
_ => return,
};
if let Some(token) = token {
insert_label_after_token(&mut editor, &make, &token, ctx, builder);
}
});
editor.add_mappings(make.finish_with_mappings());
builder.add_file_edits(ctx.vfs_file_id(), editor);
builder.rename();
},
)
}
fn insert_label_after_token(
editor: &mut SyntaxEditor,
make: &SyntaxFactory,
token: &SyntaxToken,
ctx: &AssistContext<'_>,
builder: &mut SourceChangeBuilder,
) {
let label = make.lifetime("'l");
let elements = vec![tokens::single_space().into(), label.syntax().clone().into()];
editor.insert_all(Position::after(token), elements);
if let Some(cap) = ctx.config.snippet_cap {
editor.add_annotation(label.syntax(), builder.make_placeholder_snippet(cap));
}
}
#[cfg(test)]
mod tests {
use crate::tests::{check_assist, check_assist_not_applicable};
@ -82,9 +115,9 @@ fn main() {
}"#,
r#"
fn main() {
'l: loop {
break 'l;
continue 'l;
${1:'l}: loop {
break ${2:'l};
continue ${0:'l};
}
}"#,
);
@ -107,9 +140,9 @@ fn main() {
}"#,
r#"
fn main() {
'l: loop {
break 'l;
continue 'l;
${1:'l}: loop {
break ${2:'l};
continue ${0:'l};
loop {
break;
continue;
@ -139,9 +172,9 @@ fn main() {
loop {
break;
continue;
'l: loop {
break 'l;
continue 'l;
${1:'l}: loop {
break ${2:'l};
continue ${0:'l};
}
}
}"#,

View file

@ -124,40 +124,37 @@ pub(crate) fn apply_demorgan(acc: &mut Assists, ctx: &AssistContext<'_>) -> Opti
op_range,
|builder| {
let make = SyntaxFactory::with_mappings();
let paren_expr = bin_expr.syntax().parent().and_then(ast::ParenExpr::cast);
let neg_expr = paren_expr
.clone()
let (target_node, result_expr) = if let Some(neg_expr) = bin_expr
.syntax()
.parent()
.and_then(ast::ParenExpr::cast)
.and_then(|paren_expr| paren_expr.syntax().parent())
.and_then(ast::PrefixExpr::cast)
.filter(|prefix_expr| matches!(prefix_expr.op_kind(), Some(ast::UnaryOp::Not)))
.map(ast::Expr::PrefixExpr);
let mut editor;
if let Some(paren_expr) = paren_expr {
if let Some(neg_expr) = neg_expr {
cov_mark::hit!(demorgan_double_negation);
let parent = neg_expr.syntax().parent();
editor = builder.make_editor(neg_expr.syntax());
if parent.is_some_and(|parent| {
demorganed.needs_parens_in_place_of(&parent, neg_expr.syntax())
}) {
cov_mark::hit!(demorgan_keep_parens_for_op_precedence2);
editor.replace(neg_expr.syntax(), make.expr_paren(demorganed).syntax());
} else {
editor.replace(neg_expr.syntax(), demorganed.syntax());
};
} else {
cov_mark::hit!(demorgan_double_parens);
editor = builder.make_editor(paren_expr.syntax());
editor.replace(paren_expr.syntax(), add_bang_paren(&make, demorganed).syntax());
}
{
cov_mark::hit!(demorgan_double_negation);
(ast::Expr::from(neg_expr).syntax().clone(), demorganed)
} else if let Some(paren_expr) =
bin_expr.syntax().parent().and_then(ast::ParenExpr::cast)
{
cov_mark::hit!(demorgan_double_parens);
(paren_expr.syntax().clone(), add_bang_paren(&make, demorganed))
} else {
editor = builder.make_editor(bin_expr.syntax());
editor.replace(bin_expr.syntax(), add_bang_paren(&make, demorganed).syntax());
}
(bin_expr.syntax().clone(), add_bang_paren(&make, demorganed))
};
let final_expr = if target_node
.parent()
.is_some_and(|p| result_expr.needs_parens_in_place_of(&p, &target_node))
{
cov_mark::hit!(demorgan_keep_parens_for_op_precedence2);
make.expr_paren(result_expr).into()
} else {
result_expr
};
let mut editor = builder.make_editor(&target_node);
editor.replace(&target_node, final_expr.syntax());
editor.add_mappings(make.finish_with_mappings());
builder.add_file_edits(ctx.vfs_file_id(), editor);
},
@ -636,4 +633,31 @@ fn main() {
"#,
);
}
#[test]
fn demorgan_method_call_receiver() {
check_assist(
apply_demorgan,
"fn f() { (x ||$0 !y).then_some(42) }",
"fn f() { (!(!x && y)).then_some(42) }",
);
}
#[test]
fn demorgan_method_call_receiver_complex() {
check_assist(
apply_demorgan,
"fn f() { (a && b ||$0 c && d).then_some(42) }",
"fn f() { (!(!(a && b) && !(c && d))).then_some(42) }",
);
}
#[test]
fn demorgan_method_call_receiver_chained() {
check_assist(
apply_demorgan,
"fn f() { (a ||$0 b).then_some(42).or(Some(0)) }",
"fn f() { (!(!a && !b)).then_some(42).or(Some(0)) }",
);
}
}

View file

@ -1,15 +1,10 @@
use std::iter::successors;
use either::Either;
use ide_db::{
RootDatabase,
defs::NameClass,
syntax_helpers::node_ext::{is_pattern_cond, single_let},
ty_filter::TryEnum,
};
use ide_db::{RootDatabase, defs::NameClass, ty_filter::TryEnum};
use syntax::{
AstNode, Edition, T, TextRange,
AstNode, Edition, SyntaxKind, T, TextRange,
ast::{self, HasName, edit::IndentLevel, edit_in_place::Indent, syntax_factory::SyntaxFactory},
syntax_editor::SyntaxEditor,
};
use crate::{
@ -54,42 +49,46 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<'
return None;
}
let mut else_block = None;
let indent = if_expr.indent_level();
let if_exprs = successors(Some(if_expr.clone()), |expr| match expr.else_branch()? {
ast::ElseBranch::IfExpr(expr) => Some(expr),
ast::ElseBranch::Block(block) => {
let block = unwrap_trivial_block(block).clone_for_update();
block.reindent_to(IndentLevel(1));
else_block = Some(block);
None
}
});
let scrutinee_to_be_expr = if_expr.condition()?;
let scrutinee_to_be_expr = match single_let(scrutinee_to_be_expr.clone()) {
Some(cond) => cond.expr()?,
None => scrutinee_to_be_expr,
let scrutinee_to_be_expr = match let_and_guard(&scrutinee_to_be_expr) {
(Some(let_expr), _) => let_expr.expr()?,
(None, cond) => cond?,
};
let mut pat_seen = false;
let mut cond_bodies = Vec::new();
for if_expr in if_exprs {
let cond = if_expr.condition()?;
let cond = match single_let(cond.clone()) {
Some(let_) => {
let (cond, guard) = match let_and_guard(&cond) {
(None, guard) => (None, Some(guard?)),
(Some(let_), guard) => {
let pat = let_.pat()?;
let expr = let_.expr()?;
// FIXME: If one `let` is wrapped in parentheses and the second is not,
// we'll exit here.
if scrutinee_to_be_expr.syntax().text() != expr.syntax().text() {
// Only if all condition expressions are equal we can merge them into a match
return None;
}
pat_seen = true;
Either::Left(pat)
(Some(pat), guard)
}
// Multiple `let`, unsupported.
None if is_pattern_cond(cond.clone()) => return None,
None => Either::Right(cond),
};
let body = if_expr.then_branch()?;
cond_bodies.push((cond, body));
if let Some(guard) = &guard {
guard.dedent(indent);
guard.indent(IndentLevel(1));
}
let body = if_expr.then_branch()?.clone_for_update();
body.indent(IndentLevel(1));
cond_bodies.push((cond, guard, body));
}
if !pat_seen && cond_bodies.len() != 1 {
@ -106,27 +105,25 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<'
available_range,
move |builder| {
let make = SyntaxFactory::with_mappings();
let match_expr = {
let match_expr: ast::Expr = {
let else_arm = make_else_arm(ctx, &make, else_block, &cond_bodies);
let make_match_arm = |(pat, body): (_, ast::BlockExpr)| {
let body = make.block_expr(body.statements(), body.tail_expr());
body.indent(IndentLevel::from(1));
let body = unwrap_trivial_block(body);
match pat {
Either::Left(pat) => make.match_arm(pat, None, body),
Either::Right(_) if !pat_seen => {
make.match_arm(make.literal_pat("true").into(), None, body)
let make_match_arm =
|(pat, guard, body): (_, Option<ast::Expr>, ast::BlockExpr)| {
body.reindent_to(IndentLevel::single());
let body = unwrap_trivial_block(body);
match (pat, guard.map(|it| make.match_guard(it))) {
(Some(pat), guard) => make.match_arm(pat, guard, body),
(None, _) if !pat_seen => {
make.match_arm(make.literal_pat("true").into(), None, body)
}
(None, guard) => {
make.match_arm(make.wildcard_pat().into(), guard, body)
}
}
Either::Right(expr) => make.match_arm(
make.wildcard_pat().into(),
Some(make.match_guard(expr)),
body,
),
}
};
};
let arms = cond_bodies.into_iter().map(make_match_arm).chain([else_arm]);
let match_expr = make.expr_match(scrutinee_to_be_expr, make.match_arm_list(arms));
match_expr.indent(IndentLevel::from_node(if_expr.syntax()));
match_expr.indent(indent);
match_expr.into()
};
@ -134,7 +131,11 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<'
if_expr.syntax().parent().is_some_and(|it| ast::IfExpr::can_cast(it.kind()));
let expr = if has_preceding_if_expr {
// make sure we replace the `else if let ...` with a block so we don't end up with `else expr`
make.block_expr([], Some(match_expr)).into()
match_expr.dedent(indent);
match_expr.indent(IndentLevel(1));
let block_expr = make.block_expr([], Some(match_expr));
block_expr.indent(indent);
block_expr.into()
} else {
match_expr
};
@ -150,13 +151,13 @@ pub(crate) fn replace_if_let_with_match(acc: &mut Assists, ctx: &AssistContext<'
fn make_else_arm(
ctx: &AssistContext<'_>,
make: &SyntaxFactory,
else_block: Option<ast::BlockExpr>,
conditionals: &[(Either<ast::Pat, ast::Expr>, ast::BlockExpr)],
else_expr: Option<ast::Expr>,
conditionals: &[(Option<ast::Pat>, Option<ast::Expr>, ast::BlockExpr)],
) -> ast::MatchArm {
let (pattern, expr) = if let Some(else_block) = else_block {
let (pattern, expr) = if let Some(else_expr) = else_expr {
let pattern = match conditionals {
[(Either::Right(_), _)] => make.literal_pat("false").into(),
[(Either::Left(pat), _)] => match ctx
[(None, Some(_), _)] => make.literal_pat("false").into(),
[(Some(pat), _, _)] => match ctx
.sema
.type_of_pat(pat)
.and_then(|ty| TryEnum::from_ty(&ctx.sema, &ty.adjusted()))
@ -174,10 +175,10 @@ fn make_else_arm(
},
_ => make.wildcard_pat().into(),
};
(pattern, unwrap_trivial_block(else_block))
(pattern, else_expr)
} else {
let pattern = match conditionals {
[(Either::Right(_), _)] => make.literal_pat("false").into(),
[(None, Some(_), _)] => make.literal_pat("false").into(),
_ => make.wildcard_pat().into(),
};
(pattern, make.expr_unit())
@ -266,7 +267,10 @@ pub(crate) fn replace_match_with_if_let(acc: &mut Assists, ctx: &AssistContext<'
// wrap them in another BlockExpr.
match expr {
ast::Expr::BlockExpr(block) if block.modifier().is_none() => block,
expr => make.block_expr([], Some(expr)),
expr => {
expr.indent(IndentLevel(1));
make.block_expr([], Some(expr))
}
}
};
@ -289,7 +293,9 @@ pub(crate) fn replace_match_with_if_let(acc: &mut Assists, ctx: &AssistContext<'
condition
};
let then_expr = then_expr.clone_for_update();
let else_expr = else_expr.clone_for_update();
then_expr.reindent_to(IndentLevel::single());
else_expr.reindent_to(IndentLevel::single());
let then_block = make_block_expr(then_expr);
let else_expr = if is_empty_expr(&else_expr) { None } else { Some(else_expr) };
let if_let_expr = make.expr_if(
@ -382,6 +388,48 @@ fn is_sad_pat(sema: &hir::Semantics<'_, RootDatabase>, pat: &ast::Pat) -> bool {
.is_some_and(|it| does_pat_match_variant(pat, &it.sad_pattern()))
}
fn let_and_guard(cond: &ast::Expr) -> (Option<ast::LetExpr>, Option<ast::Expr>) {
if let ast::Expr::ParenExpr(expr) = cond
&& let Some(sub_expr) = expr.expr()
{
let_and_guard(&sub_expr)
} else if let ast::Expr::LetExpr(let_expr) = cond {
(Some(let_expr.clone()), None)
} else if let ast::Expr::BinExpr(bin_expr) = cond
&& let Some(ast::Expr::LetExpr(let_expr)) = and_bin_expr_left(bin_expr).lhs()
{
let new_expr = bin_expr.clone_subtree();
let mut edit = SyntaxEditor::new(new_expr.syntax().clone());
let left_bin = and_bin_expr_left(&new_expr);
if let Some(rhs) = left_bin.rhs() {
edit.replace(left_bin.syntax(), rhs.syntax());
} else {
if let Some(next) = left_bin.syntax().next_sibling_or_token()
&& next.kind() == SyntaxKind::WHITESPACE
{
edit.delete(next);
}
edit.delete(left_bin.syntax());
}
let new_expr = edit.finish().new_root().clone();
(Some(let_expr), ast::Expr::cast(new_expr))
} else {
(None, Some(cond.clone()))
}
}
fn and_bin_expr_left(expr: &ast::BinExpr) -> ast::BinExpr {
if expr.op_kind() == Some(ast::BinaryOp::LogicOp(ast::LogicOp::And))
&& let Some(ast::Expr::BinExpr(left)) = expr.lhs()
{
and_bin_expr_left(&left)
} else {
expr.clone()
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -452,6 +500,45 @@ pub fn foo(foo: bool) {
)
}
#[test]
fn test_if_with_match_comments() {
check_assist(
replace_if_let_with_match,
r#"
pub fn foo(foo: i32) {
$0if let 1 = foo {
// some comment
self.foo();
} else if let 2 = foo {
// some comment 2
self.bar()
} else {
// some comment 3
self.baz();
}
}
"#,
r#"
pub fn foo(foo: i32) {
match foo {
1 => {
// some comment
self.foo();
}
2 => {
// some comment 2
self.bar()
}
_ => {
// some comment 3
self.baz();
}
}
}
"#,
)
}
#[test]
fn test_if_let_with_match_no_else() {
check_assist(
@ -514,14 +601,151 @@ impl VariantData {
#[test]
fn test_if_let_with_match_let_chain() {
check_assist_not_applicable(
check_assist(
replace_if_let_with_match,
r#"
#![feature(if_let_guard)]
fn main() {
if $0let true = true && let Some(1) = None {} else { other() }
}
"#,
r#"
#![feature(if_let_guard)]
fn main() {
match true {
true if let Some(1) = None => {}
_ => other(),
}
}
"#,
);
check_assist(
replace_if_let_with_match,
r#"
#![feature(if_let_guard)]
fn main() {
if true {
$0if let ParenExpr(expr) = cond
&& let Some(sub_expr) = expr.expr()
{
branch1(
"..."
)
} else if let LetExpr(let_expr) = cond {
branch2(
"..."
)
} else if let BinExpr(bin_expr) = cond
&& let Some(kind) = bin_expr.op_kind()
&& let Some(LetExpr(let_expr)) = foo(bin_expr)
{
branch3()
} else {
branch4(
"..."
)
}
}
}
"#,
r#"
#![feature(if_let_guard)]
fn main() {
if true {
match cond {
ParenExpr(expr) if let Some(sub_expr) = expr.expr() => {
branch1(
"..."
)
}
LetExpr(let_expr) => {
branch2(
"..."
)
}
BinExpr(bin_expr) if let Some(kind) = bin_expr.op_kind()
&& let Some(LetExpr(let_expr)) = foo(bin_expr) => branch3(),
_ => {
branch4(
"..."
)
}
}
}
}
"#,
);
check_assist(
replace_if_let_with_match,
r#"
fn main() {
if $0let true = true
&& true
&& false
{
code()
} else {
other()
}
}
"#,
r#"
fn main() {
match true {
true if true
&& false => code(),
_ => other(),
}
}
"#,
);
}
#[test]
fn test_if_let_with_match_let_chain_no_else() {
check_assist(
replace_if_let_with_match,
r#"
#![feature(if_let_guard)]
fn main() {
if $0let true = true && let Some(1) = None {}
}
"#,
)
r#"
#![feature(if_let_guard)]
fn main() {
match true {
true if let Some(1) = None => {}
_ => (),
}
}
"#,
);
check_assist(
replace_if_let_with_match,
r#"
fn main() {
if $0let true = true
&& true
&& false
{
code()
}
}
"#,
r#"
fn main() {
match true {
true if true
&& false => code(),
_ => (),
}
}
"#,
);
}
#[test]
@ -553,10 +777,10 @@ impl VariantData {
VariantData::Tuple(..) => false,
_ if cond() => true,
_ => {
bar(
123
)
}
bar(
123
)
}
}
}
}
@ -587,11 +811,11 @@ impl VariantData {
if let VariantData::Struct(..) = *self {
true
} else {
match *self {
VariantData::Tuple(..) => false,
_ => false,
match *self {
VariantData::Tuple(..) => false,
_ => false,
}
}
}
}
}
"#,
@ -706,9 +930,12 @@ fn foo(x: Result<i32, ()>) {
fn main() {
if true {
$0if let Ok(rel_path) = path.strip_prefix(root_path) {
let rel_path = RelativePathBuf::from_path(rel_path).ok()?;
let rel_path = RelativePathBuf::from_path(rel_path)
.ok()?;
Some((*id, rel_path))
} else {
let _ = some_code()
.clone();
None
}
}
@ -719,10 +946,52 @@ fn main() {
if true {
match path.strip_prefix(root_path) {
Ok(rel_path) => {
let rel_path = RelativePathBuf::from_path(rel_path).ok()?;
let rel_path = RelativePathBuf::from_path(rel_path)
.ok()?;
Some((*id, rel_path))
}
_ => None,
_ => {
let _ = some_code()
.clone();
None
}
}
}
}
"#,
);
check_assist(
replace_if_let_with_match,
r#"
fn main() {
if true {
$0if let Ok(rel_path) = path.strip_prefix(root_path) {
Foo {
x: 1
}
} else {
Foo {
x: 2
}
}
}
}
"#,
r#"
fn main() {
if true {
match path.strip_prefix(root_path) {
Ok(rel_path) => {
Foo {
x: 1
}
}
_ => {
Foo {
x: 2
}
}
}
}
}
@ -1583,11 +1852,12 @@ fn foo(x: Result<i32, ()>) {
fn main() {
if true {
$0match path.strip_prefix(root_path) {
Ok(rel_path) => {
let rel_path = RelativePathBuf::from_path(rel_path).ok()?;
Some((*id, rel_path))
Ok(rel_path) => Foo {
x: 2
}
_ => None,
_ => Foo {
x: 3
},
}
}
}
@ -1596,15 +1866,55 @@ fn main() {
fn main() {
if true {
if let Ok(rel_path) = path.strip_prefix(root_path) {
let rel_path = RelativePathBuf::from_path(rel_path).ok()?;
Foo {
x: 2
}
} else {
Foo {
x: 3
}
}
}
}
"#,
);
check_assist(
replace_match_with_if_let,
r#"
fn main() {
if true {
$0match path.strip_prefix(root_path) {
Ok(rel_path) => {
let rel_path = RelativePathBuf::from_path(rel_path)
.ok()?;
Some((*id, rel_path))
}
_ => {
let _ = some_code()
.clone();
None
},
}
}
}
"#,
r#"
fn main() {
if true {
if let Ok(rel_path) = path.strip_prefix(root_path) {
let rel_path = RelativePathBuf::from_path(rel_path)
.ok()?;
Some((*id, rel_path))
} else {
let _ = some_code()
.clone();
None
}
}
}
"#,
)
);
}
#[test]

View file

@ -183,9 +183,9 @@ fn main() {
"#####,
r#####"
fn main() {
'l: loop {
break 'l;
continue 'l;
${1:'l}: loop {
break ${2:'l};
continue ${0:'l};
}
}
"#####,

View file

@ -57,6 +57,14 @@ pub fn extract_trivial_expression(block_expr: &ast::BlockExpr) -> Option<ast::Ex
});
non_trivial_children.next().is_some()
};
if stmt_list
.syntax()
.children_with_tokens()
.filter_map(NodeOrToken::into_token)
.any(|token| token.kind() == syntax::SyntaxKind::COMMENT)
{
return None;
}
if let Some(expr) = stmt_list.tail_expr() {
if has_anything_else(expr.syntax()) {

View file

@ -13,7 +13,7 @@ use ide_db::{
};
use stdx::never;
use syntax::{
SyntaxKind::{BLOCK_EXPR, EXPR_STMT, FOR_EXPR, IF_EXPR, LOOP_EXPR, STMT_LIST, WHILE_EXPR},
SyntaxKind::{EXPR_STMT, STMT_LIST},
T, TextRange, TextSize,
ast::{self, AstNode, AstToken},
match_ast,
@ -253,18 +253,15 @@ pub(crate) fn complete_postfix(
}
}
let mut block_should_be_wrapped = true;
if dot_receiver.syntax().kind() == BLOCK_EXPR {
block_should_be_wrapped = false;
if let Some(parent) = dot_receiver.syntax().parent()
&& matches!(parent.kind(), IF_EXPR | WHILE_EXPR | LOOP_EXPR | FOR_EXPR)
{
block_should_be_wrapped = true;
}
let block_should_be_wrapped = if let ast::Expr::BlockExpr(block) = dot_receiver {
block.modifier().is_some() || !block.is_standalone()
} else {
true
};
{
let (open_brace, close_brace) =
if block_should_be_wrapped { ("{ ", " }") } else { ("", "") };
// FIXME: Why add parentheses
let (open_paren, close_paren) = if is_in_cond { ("(", ")") } else { ("", "") };
let unsafe_completion_string =
format!("{open_paren}unsafe {open_brace}{receiver_text}{close_brace}{close_paren}");
@ -842,6 +839,20 @@ fn main() {
&format!("fn main() {{ let x = {kind} {{ if true {{1}} else {{2}} }} }}"),
);
if kind == "const" {
check_edit(
kind,
r#"fn main() { unsafe {1}.$0 }"#,
&format!("fn main() {{ {kind} {{ unsafe {{1}} }} }}"),
);
} else {
check_edit(
kind,
r#"fn main() { const {1}.$0 }"#,
&format!("fn main() {{ {kind} {{ const {{1}} }} }}"),
);
}
// completion will not be triggered
check_edit(
kind,

View file

@ -3,10 +3,12 @@
use base_db::SourceRootId;
use profile::Bytes;
use rustc_hash::FxHashSet;
use salsa::{Database as _, Durability};
use triomphe::Arc;
use salsa::{Database as _, Durability, Setter as _};
use crate::{ChangeWithProcMacros, RootDatabase, symbol_index::SymbolsDatabase};
use crate::{
ChangeWithProcMacros, RootDatabase,
symbol_index::{LibraryRoots, LocalRoots},
};
impl RootDatabase {
pub fn request_cancellation(&mut self) {
@ -29,8 +31,8 @@ impl RootDatabase {
local_roots.insert(root_id);
}
}
self.set_local_roots_with_durability(Arc::new(local_roots), Durability::MEDIUM);
self.set_library_roots_with_durability(Arc::new(library_roots), Durability::MEDIUM);
LocalRoots::get(self).set_roots(self).to(local_roots);
LibraryRoots::get(self).set_roots(self).to(library_roots);
}
change.apply(self);
}

View file

@ -10,7 +10,7 @@ use hir::{Complete, Crate, ItemInNs, Module, import_map};
use crate::{
RootDatabase,
imports::import_assets::NameToImport,
symbol_index::{self, SymbolsDatabase as _},
symbol_index::{self, SymbolIndex},
};
/// A value to use, when uncertain which limit to pick.
@ -110,7 +110,7 @@ pub fn items_with_name_in_module<T>(
local_query
}
};
local_query.search(&[db.module_symbols(module)], |local_candidate| {
local_query.search(&[SymbolIndex::module_symbols(db, module)], |local_candidate| {
cb(match local_candidate.def {
hir::ModuleDef::Macro(macro_def) => ItemInNs::Macros(macro_def),
def => ItemInNs::from(def),

View file

@ -64,7 +64,7 @@ use hir::{
};
use triomphe::Arc;
use crate::{line_index::LineIndex, symbol_index::SymbolsDatabase};
use crate::line_index::LineIndex;
pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
pub use ::line_index;
@ -195,8 +195,12 @@ impl RootDatabase {
db.set_all_crates(Arc::new(Box::new([])));
CrateGraphBuilder::default().set_in_db(&mut db);
db.set_proc_macros_with_durability(Default::default(), Durability::MEDIUM);
db.set_local_roots_with_durability(Default::default(), Durability::MEDIUM);
db.set_library_roots_with_durability(Default::default(), Durability::MEDIUM);
_ = crate::symbol_index::LibraryRoots::builder(Default::default())
.durability(Durability::MEDIUM)
.new(&db);
_ = crate::symbol_index::LocalRoots::builder(Default::default())
.durability(Durability::MEDIUM)
.new(&db);
db.set_expand_proc_attr_macros_with_durability(false, Durability::HIGH);
db.update_base_query_lru_capacities(lru_capacity);
db

View file

@ -11,7 +11,7 @@ use salsa::{Cancelled, Database};
use crate::{
FxIndexMap, RootDatabase,
base_db::{Crate, RootQueryDb},
symbol_index::SymbolsDatabase,
symbol_index::SymbolIndex,
};
/// We're indexing many crates.
@ -107,8 +107,9 @@ pub fn parallel_prime_caches(
Ok::<_, crossbeam_channel::SendError<_>>(())
};
let handle_symbols = |module| {
let cancelled =
Cancelled::catch(AssertUnwindSafe(|| _ = db.module_symbols(module)));
let cancelled = Cancelled::catch(AssertUnwindSafe(|| {
_ = SymbolIndex::module_symbols(&db, module)
}));
match cancelled {
Ok(()) => progress_sender

View file

@ -2,7 +2,7 @@
use std::hash::{BuildHasher, Hash};
use hir::{CfgExpr, FilePositionWrapper, FileRangeWrapper, Semantics};
use hir::{CfgExpr, FilePositionWrapper, FileRangeWrapper, Semantics, Symbol};
use smallvec::SmallVec;
use span::{TextRange, TextSize};
use syntax::{
@ -524,6 +524,7 @@ impl_empty_upmap_from_ra_fixture!(
f64,
&str,
String,
Symbol,
SmolStr,
Documentation,
SymbolKind,

View file

@ -27,7 +27,7 @@ use std::{
ops::ControlFlow,
};
use base_db::{RootQueryDb, SourceDatabase, SourceRootId};
use base_db::{RootQueryDb, SourceRootId};
use fst::{Automaton, Streamer, raw::IndexedValue};
use hir::{
Crate, Module,
@ -37,7 +37,6 @@ use hir::{
};
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use triomphe::Arc;
use crate::RootDatabase;
@ -102,63 +101,26 @@ impl Query {
}
}
#[query_group::query_group]
pub trait SymbolsDatabase: HirDatabase + SourceDatabase {
/// The symbol index for a given module. These modules should only be in source roots that
/// are inside local_roots.
// FIXME: Is it worth breaking the encapsulation boundary of `hir`, and make this take a `ModuleId`,
// in order for it to be a non-interned query?
#[salsa::invoke_interned(module_symbols)]
fn module_symbols(&self, module: Module) -> Arc<SymbolIndex>;
/// The symbol index for a given source root within library_roots.
#[salsa::invoke_interned(library_symbols)]
fn library_symbols(&self, source_root_id: SourceRootId) -> Arc<SymbolIndex>;
#[salsa::transparent]
/// The symbol indices of modules that make up a given crate.
fn crate_symbols(&self, krate: Crate) -> Box<[Arc<SymbolIndex>]>;
/// The set of "local" (that is, from the current workspace) roots.
/// Files in local roots are assumed to change frequently.
#[salsa::input]
fn local_roots(&self) -> Arc<FxHashSet<SourceRootId>>;
/// The set of roots for crates.io libraries.
/// Files in libraries are assumed to never change.
#[salsa::input]
fn library_roots(&self) -> Arc<FxHashSet<SourceRootId>>;
/// The set of roots for crates.io libraries.
/// Files in libraries are assumed to never change.
#[salsa::input(singleton, debug)]
pub struct LibraryRoots {
#[returns(ref)]
pub roots: FxHashSet<SourceRootId>,
}
fn library_symbols(db: &dyn SymbolsDatabase, source_root_id: SourceRootId) -> Arc<SymbolIndex> {
let _p = tracing::info_span!("library_symbols").entered();
// We call this without attaching because this runs in parallel, so we need to attach here.
hir::attach_db(db, || {
let mut symbol_collector = SymbolCollector::new(db);
db.source_root_crates(source_root_id)
.iter()
.flat_map(|&krate| Crate::from(krate).modules(db))
// we specifically avoid calling other SymbolsDatabase queries here, even though they do the same thing,
// as the index for a library is not going to really ever change, and we do not want to store each
// the module or crate indices for those in salsa unless we need to.
.for_each(|module| symbol_collector.collect(module));
Arc::new(SymbolIndex::new(symbol_collector.finish()))
})
/// The set of "local" (that is, from the current workspace) roots.
/// Files in local roots are assumed to change frequently.
#[salsa::input(singleton, debug)]
pub struct LocalRoots {
#[returns(ref)]
pub roots: FxHashSet<SourceRootId>,
}
fn module_symbols(db: &dyn SymbolsDatabase, module: Module) -> Arc<SymbolIndex> {
let _p = tracing::info_span!("module_symbols").entered();
// We call this without attaching because this runs in parallel, so we need to attach here.
hir::attach_db(db, || Arc::new(SymbolIndex::new(SymbolCollector::new_module(db, module))))
}
pub fn crate_symbols(db: &dyn SymbolsDatabase, krate: Crate) -> Box<[Arc<SymbolIndex>]> {
/// The symbol indices of modules that make up a given crate.
pub fn crate_symbols(db: &dyn HirDatabase, krate: Crate) -> Box<[&SymbolIndex]> {
let _p = tracing::info_span!("crate_symbols").entered();
krate.modules(db).into_iter().map(|module| db.module_symbols(module)).collect()
krate.modules(db).into_iter().map(|module| SymbolIndex::module_symbols(db, module)).collect()
}
// Feature: Workspace Symbol
@ -190,20 +152,26 @@ pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> {
let _p = tracing::info_span!("world_symbols", query = ?query.query).entered();
let indices: Vec<_> = if query.libs {
db.library_roots()
LibraryRoots::get(db)
.roots(db)
.par_iter()
.map_with(db.clone(), |snap, &root| snap.library_symbols(root))
.for_each_with(db.clone(), |snap, &root| _ = SymbolIndex::library_symbols(snap, root));
LibraryRoots::get(db)
.roots(db)
.iter()
.map(|&root| SymbolIndex::library_symbols(db, root))
.collect()
} else {
let mut crates = Vec::new();
for &root in db.local_roots().iter() {
for &root in LocalRoots::get(db).roots(db).iter() {
crates.extend(db.source_root_crates(root).iter().copied())
}
let indices: Vec<_> = crates
.into_par_iter()
.map_with(db.clone(), |snap, krate| snap.crate_symbols(krate.into()))
.collect();
crates
.par_iter()
.for_each_with(db.clone(), |snap, &krate| _ = crate_symbols(snap, krate.into()));
let indices: Vec<_> =
crates.into_iter().map(|krate| crate_symbols(db, krate.into())).collect();
indices.iter().flat_map(|indices| indices.iter().cloned()).collect()
};
@ -221,6 +189,67 @@ pub struct SymbolIndex {
map: fst::Map<Vec<u8>>,
}
impl SymbolIndex {
/// The symbol index for a given source root within library_roots.
pub fn library_symbols(db: &dyn HirDatabase, source_root_id: SourceRootId) -> &SymbolIndex {
// FIXME:
#[salsa::interned]
struct InternedSourceRootId {
id: SourceRootId,
}
#[salsa::tracked(returns(ref))]
fn library_symbols(
db: &dyn HirDatabase,
source_root_id: InternedSourceRootId<'_>,
) -> SymbolIndex {
let _p = tracing::info_span!("library_symbols").entered();
// We call this without attaching because this runs in parallel, so we need to attach here.
hir::attach_db(db, || {
let mut symbol_collector = SymbolCollector::new(db, true);
db.source_root_crates(source_root_id.id(db))
.iter()
.flat_map(|&krate| Crate::from(krate).modules(db))
// we specifically avoid calling other SymbolsDatabase queries here, even though they do the same thing,
// as the index for a library is not going to really ever change, and we do not want to store
// the module or crate indices for those in salsa unless we need to.
.for_each(|module| symbol_collector.collect(module));
SymbolIndex::new(symbol_collector.finish())
})
}
library_symbols(db, InternedSourceRootId::new(db, source_root_id))
}
/// The symbol index for a given module. These modules should only be in source roots that
/// are inside local_roots.
pub fn module_symbols(db: &dyn HirDatabase, module: Module) -> &SymbolIndex {
// FIXME:
#[salsa::interned]
struct InternedModuleId {
id: hir::ModuleId,
}
#[salsa::tracked(returns(ref))]
fn module_symbols(db: &dyn HirDatabase, module: InternedModuleId<'_>) -> SymbolIndex {
let _p = tracing::info_span!("module_symbols").entered();
// We call this without attaching because this runs in parallel, so we need to attach here.
hir::attach_db(db, || {
let module: Module = module.id(db).into();
SymbolIndex::new(SymbolCollector::new_module(
db,
module,
!module.krate().origin(db).is_local(),
))
})
}
module_symbols(db, InternedModuleId::new(db, hir::ModuleId::from(module)))
}
}
impl fmt::Debug for SymbolIndex {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish()
@ -309,7 +338,7 @@ impl SymbolIndex {
impl Query {
pub(crate) fn search<'sym, T>(
self,
indices: &'sym [Arc<SymbolIndex>],
indices: &'sym [&SymbolIndex],
cb: impl FnMut(&'sym FileSymbol) -> ControlFlow<T>,
) -> Option<T> {
let _p = tracing::info_span!("symbol_index::Query::search").entered();
@ -344,7 +373,7 @@ impl Query {
fn search_maps<'sym, T>(
&self,
indices: &'sym [Arc<SymbolIndex>],
indices: &'sym [&SymbolIndex],
mut stream: fst::map::Union<'_>,
mut cb: impl FnMut(&'sym FileSymbol) -> ControlFlow<T>,
) -> Option<T> {
@ -397,7 +426,7 @@ impl Query {
mod tests {
use expect_test::expect_file;
use salsa::Durability;
use salsa::Setter;
use test_fixture::{WORKSPACE, WithFixture};
use super::*;
@ -484,7 +513,7 @@ pub(self) use crate::Trait as IsThisJustATrait;
.modules(&db)
.into_iter()
.map(|module_id| {
let mut symbols = SymbolCollector::new_module(&db, module_id);
let mut symbols = SymbolCollector::new_module(&db, module_id, false);
symbols.sort_by_key(|it| it.name.as_str().to_owned());
(module_id, symbols)
})
@ -511,7 +540,7 @@ struct Duplicate;
.modules(&db)
.into_iter()
.map(|module_id| {
let mut symbols = SymbolCollector::new_module(&db, module_id);
let mut symbols = SymbolCollector::new_module(&db, module_id, false);
symbols.sort_by_key(|it| it.name.as_str().to_owned());
(module_id, symbols)
})
@ -535,7 +564,7 @@ pub struct Foo;
let mut local_roots = FxHashSet::default();
local_roots.insert(WORKSPACE);
db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH);
LocalRoots::get(&db).set_roots(&mut db).to(local_roots);
let mut query = Query::new("Foo".to_owned());
let mut symbols = world_symbols(&db, query.clone());

View file

@ -80,7 +80,7 @@ pub use crate::{errors::SsrError, from_comment::ssr_from_comment, matching::Matc
use crate::{errors::bail, matching::MatchFailureReason};
use hir::{FileRange, Semantics};
use ide_db::symbol_index::SymbolsDatabase;
use ide_db::symbol_index::LocalRoots;
use ide_db::text_edit::TextEdit;
use ide_db::{EditionedFileId, FileId, FxHashMap, RootDatabase, base_db::SourceDatabase};
use resolving::ResolvedRule;
@ -138,8 +138,8 @@ impl<'db> MatchFinder<'db> {
/// Constructs an instance using the start of the first file in `db` as the lookup context.
pub fn at_first_file(db: &'db ide_db::RootDatabase) -> Result<MatchFinder<'db>, SsrError> {
if let Some(first_file_id) = db
.local_roots()
if let Some(first_file_id) = LocalRoots::get(db)
.roots(db)
.iter()
.next()
.and_then(|root| db.source_root(*root).source_root(db).iter().next())

View file

@ -9,6 +9,7 @@ use ide_db::{
EditionedFileId, FileId, FxHashSet,
defs::Definition,
search::{SearchScope, UsageSearchResult},
symbol_index::LocalRoots,
};
use syntax::{AstNode, SyntaxKind, SyntaxNode, ast};
@ -156,8 +157,7 @@ impl<'db> MatchFinder<'db> {
if self.restrict_ranges.is_empty() {
// Unrestricted search.
use ide_db::base_db::SourceDatabase;
use ide_db::symbol_index::SymbolsDatabase;
for &root in self.sema.db.local_roots().iter() {
for &root in LocalRoots::get(self.sema.db).roots(self.sema.db).iter() {
let sr = self.sema.db.source_root(root).source_root(self.sema.db);
for file_id in sr.iter() {
callback(file_id);

View file

@ -2,10 +2,10 @@ use expect_test::{Expect, expect};
use hir::{FilePosition, FileRange};
use ide_db::{
EditionedFileId, FxHashSet,
base_db::{SourceDatabase, salsa::Durability},
base_db::{SourceDatabase, salsa::Setter},
symbol_index::LocalRoots,
};
use test_utils::RangeOrOffset;
use triomphe::Arc;
use crate::{MatchFinder, SsrRule};
@ -66,7 +66,6 @@ fn parser_undefined_placeholder_in_replacement() {
/// `code` may optionally contain a cursor marker `$0`. If it doesn't, then the position will be
/// the start of the file. If there's a second cursor marker, then we'll return a single range.
pub(crate) fn single_file(code: &str) -> (ide_db::RootDatabase, FilePosition, Vec<FileRange>) {
use ide_db::symbol_index::SymbolsDatabase;
use test_fixture::{WORKSPACE, WithFixture};
let (mut db, file_id, range_or_offset) = if code.contains(test_utils::CURSOR_MARKER) {
ide_db::RootDatabase::with_range_or_offset(code)
@ -88,7 +87,7 @@ pub(crate) fn single_file(code: &str) -> (ide_db::RootDatabase, FilePosition, Ve
}
let mut local_roots = FxHashSet::default();
local_roots.insert(WORKSPACE);
db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH);
LocalRoots::get(&db).set_roots(&mut db).to(local_roots);
(db, position, selections)
}

View file

@ -245,7 +245,7 @@ fn try_lookup_include_path(
Some(NavigationTarget {
file_id,
full_range: TextRange::new(0.into(), size),
name: path.into(),
name: hir::Symbol::intern(&path),
alias: None,
focus_range: None,
kind: None,
@ -598,7 +598,13 @@ fn expr_to_nav(
let value_range = value.syntax().text_range();
let navs = navigation_target::orig_range_with_focus_r(db, file_id, value_range, focus_range);
navs.map(|(hir::FileRangeWrapper { file_id, range }, focus_range)| {
NavigationTarget::from_syntax(file_id, "<expr>".into(), focus_range, range, kind)
NavigationTarget::from_syntax(
file_id,
hir::Symbol::intern("<expr>"),
focus_range,
range,
kind,
)
})
}
@ -607,7 +613,6 @@ mod tests {
use crate::{GotoDefinitionConfig, fixture};
use ide_db::{FileRange, MiniCore};
use itertools::Itertools;
use syntax::SmolStr;
const TEST_CONFIG: GotoDefinitionConfig<'_> =
GotoDefinitionConfig { minicore: MiniCore::default() };
@ -658,7 +663,7 @@ mod tests {
let Some(target) = navs.into_iter().next() else {
panic!("expected single navigation target but encountered none");
};
assert_eq!(target.name, SmolStr::new_inline(expected_name));
assert_eq!(target.name, hir::Symbol::intern(expected_name));
}
#[test]

View file

@ -111,7 +111,8 @@ fn get_callable<'db>(
}
const INSIGNIFICANT_METHOD_NAMES: &[&str] = &["clone", "as_ref", "into"];
const INSIGNIFICANT_PARAMETER_NAMES: &[&str] = &["predicate", "value", "pat", "rhs", "other"];
const INSIGNIFICANT_PARAMETER_NAMES: &[&str] =
&["predicate", "value", "pat", "rhs", "other", "msg", "op"];
fn should_hide_param_name_hint(
sema: &Semantics<'_, RootDatabase>,

View file

@ -6,7 +6,8 @@ use arrayvec::ArrayVec;
use either::Either;
use hir::{
AssocItem, Crate, FieldSource, HasContainer, HasCrate, HasSource, HirDisplay, HirFileId,
InFile, LocalSource, ModuleSource, Semantics, db::ExpandDatabase, symbols::FileSymbol,
InFile, LocalSource, ModuleSource, Semantics, Symbol, db::ExpandDatabase, sym,
symbols::FileSymbol,
};
use ide_db::{
FileId, FileRange, RootDatabase, SymbolKind,
@ -16,12 +17,10 @@ use ide_db::{
famous_defs::FamousDefs,
ra_fixture::UpmapFromRaFixture,
};
use span::Edition;
use stdx::never;
use syntax::{
AstNode, SmolStr, SyntaxNode, TextRange, ToSmolStr,
AstNode, SyntaxNode, TextRange,
ast::{self, HasName},
format_smolstr,
};
/// `NavigationTarget` represents an element in the editor's UI which you can
@ -48,17 +47,14 @@ pub struct NavigationTarget {
///
/// This range must be contained within [`Self::full_range`].
pub focus_range: Option<TextRange>,
// FIXME: Symbol
pub name: SmolStr,
pub name: Symbol,
pub kind: Option<SymbolKind>,
// FIXME: Symbol
pub container_name: Option<SmolStr>,
pub container_name: Option<Symbol>,
pub description: Option<String>,
pub docs: Option<Documentation>,
/// In addition to a `name` field, a `NavigationTarget` may also be aliased
/// In such cases we want a `NavigationTarget` to be accessible by its alias
// FIXME: Symbol
pub alias: Option<SmolStr>,
pub alias: Option<Symbol>,
}
impl fmt::Debug for NavigationTarget {
@ -149,9 +145,7 @@ impl NavigationTarget {
db: &RootDatabase,
module: hir::Module,
) -> UpmappingResult<NavigationTarget> {
let edition = module.krate().edition(db);
let name =
module.name(db).map(|it| it.display_no_db(edition).to_smolstr()).unwrap_or_default();
let name = module.name(db).map(|it| it.symbol().clone()).unwrap_or_else(|| sym::underscore);
match module.declaration_source(db) {
Some(InFile { value, file_id }) => {
orig_range_with_focus(db, file_id, value.syntax(), value.name()).map(
@ -199,7 +193,8 @@ impl NavigationTarget {
InFile { file_id, value }: InFile<&dyn ast::HasName>,
kind: SymbolKind,
) -> UpmappingResult<NavigationTarget> {
let name: SmolStr = value.name().map(|it| it.text().into()).unwrap_or_else(|| "_".into());
let name =
value.name().map(|it| Symbol::intern(&it.text())).unwrap_or_else(|| sym::underscore);
orig_range_with_focus(db, file_id, value.syntax(), value.name()).map(
|(FileRange { file_id, range: full_range }, focus_range)| {
@ -210,7 +205,7 @@ impl NavigationTarget {
pub(crate) fn from_syntax(
file_id: FileId,
name: SmolStr,
name: Symbol,
focus_range: Option<TextRange>,
full_range: TextRange,
kind: SymbolKind,
@ -235,8 +230,6 @@ impl TryToNav for FileSymbol {
sema: &Semantics<'_, RootDatabase>,
) -> Option<UpmappingResult<NavigationTarget>> {
let db = sema.db;
let edition =
self.def.module(db).map(|it| it.krate().edition(db)).unwrap_or(Edition::CURRENT);
let display_target = self.def.krate(db).to_display_target(db);
Some(
orig_range_with_focus_r(
@ -248,11 +241,12 @@ impl TryToNav for FileSymbol {
.map(|(FileRange { file_id, range: full_range }, focus_range)| {
NavigationTarget {
file_id,
name: self.is_alias.then(|| self.def.name(db)).flatten().map_or_else(
|| self.name.as_str().into(),
|it| it.display_no_db(edition).to_smolstr(),
),
alias: self.is_alias.then(|| self.name.as_str().into()),
name: self
.is_alias
.then(|| self.def.name(db))
.flatten()
.map_or_else(|| self.name.clone(), |it| it.symbol().clone()),
alias: self.is_alias.then(|| self.name.clone()),
kind: Some(self.def.into()),
full_range,
focus_range,
@ -349,52 +343,50 @@ impl TryToNav for hir::ModuleDef {
pub(crate) trait ToNavFromAst: Sized {
const KIND: SymbolKind;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
_ = db;
None
}
}
fn container_name(db: &RootDatabase, t: impl HasContainer, edition: Edition) -> Option<SmolStr> {
fn container_name(db: &RootDatabase, t: impl HasContainer) -> Option<Symbol> {
match t.container(db) {
hir::ItemContainer::Trait(it) => Some(it.name(db).display_no_db(edition).to_smolstr()),
hir::ItemContainer::Trait(it) => Some(it.name(db).symbol().clone()),
// FIXME: Handle owners of blocks correctly here
hir::ItemContainer::Module(it) => {
it.name(db).map(|name| name.display_no_db(edition).to_smolstr())
}
hir::ItemContainer::Module(it) => it.name(db).map(|name| name.symbol().clone()),
_ => None,
}
}
impl ToNavFromAst for hir::Function {
const KIND: SymbolKind = SymbolKind::Function;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Const {
const KIND: SymbolKind = SymbolKind::Const;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Static {
const KIND: SymbolKind = SymbolKind::Static;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Struct {
const KIND: SymbolKind = SymbolKind::Struct;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Enum {
const KIND: SymbolKind = SymbolKind::Enum;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Variant {
@ -402,20 +394,20 @@ impl ToNavFromAst for hir::Variant {
}
impl ToNavFromAst for hir::Union {
const KIND: SymbolKind = SymbolKind::Union;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::TypeAlias {
const KIND: SymbolKind = SymbolKind::TypeAlias;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
impl ToNavFromAst for hir::Trait {
const KIND: SymbolKind = SymbolKind::Trait;
fn container_name(self, db: &RootDatabase) -> Option<SmolStr> {
container_name(db, self, self.krate(db).edition(db))
fn container_name(self, db: &RootDatabase) -> Option<Symbol> {
container_name(db, self)
}
}
@ -451,10 +443,8 @@ where
impl ToNav for hir::Module {
fn to_nav(&self, db: &RootDatabase) -> UpmappingResult<NavigationTarget> {
let InFile { file_id, value } = self.definition_source(db);
let edition = self.krate(db).edition(db);
let name =
self.name(db).map(|it| it.display_no_db(edition).to_smolstr()).unwrap_or_default();
let name = self.name(db).map(|it| it.symbol().clone()).unwrap_or_else(|| sym::underscore);
let (syntax, focus) = match &value {
ModuleSource::SourceFile(node) => (node.syntax(), None),
ModuleSource::Module(node) => (node.syntax(), node.name()),
@ -499,7 +489,7 @@ impl TryToNav for hir::Impl {
|(FileRange { file_id, range: full_range }, focus_range)| {
NavigationTarget::from_syntax(
file_id,
"impl".into(),
sym::kw_impl,
focus_range,
full_range,
SymbolKind::Impl,
@ -521,16 +511,12 @@ impl TryToNav for hir::ExternCrateDecl {
.rename()
.map_or_else(|| value.name_ref().map(Either::Left), |it| it.name().map(Either::Right));
let krate = self.module(db).krate();
let edition = krate.edition(db);
Some(orig_range_with_focus(db, file_id, value.syntax(), focus).map(
|(FileRange { file_id, range: full_range }, focus_range)| {
let mut res = NavigationTarget::from_syntax(
file_id,
self.alias_or_name(db)
.unwrap_or_else(|| self.name(db))
.display_no_db(edition)
.to_smolstr(),
self.alias_or_name(db).unwrap_or_else(|| self.name(db)).symbol().clone(),
focus_range,
full_range,
SymbolKind::Module,
@ -538,7 +524,7 @@ impl TryToNav for hir::ExternCrateDecl {
res.docs = self.docs(db);
res.description = Some(self.display(db, krate.to_display_target(db)).to_string());
res.container_name = container_name(db, *self, edition);
res.container_name = container_name(db, *self);
res
},
))
@ -570,7 +556,7 @@ impl TryToNav for hir::Field {
|(FileRange { file_id, range: full_range }, focus_range)| {
NavigationTarget::from_syntax(
file_id,
format_smolstr!("{}", self.index()),
Symbol::integer(self.index()),
focus_range,
full_range,
SymbolKind::Field,
@ -655,11 +641,10 @@ impl ToNav for LocalSource {
Either::Left(bind_pat) => (bind_pat.syntax(), bind_pat.name()),
Either::Right(it) => (it.syntax(), it.name()),
};
let edition = self.local.parent(db).module(db).krate().edition(db);
orig_range_with_focus(db, file_id, node, name).map(
|(FileRange { file_id, range: full_range }, focus_range)| {
let name = local.name(db).display_no_db(edition).to_smolstr();
let name = local.name(db).symbol().clone();
let kind = if local.is_self(db) {
SymbolKind::SelfParam
} else if local.is_param(db) {
@ -696,8 +681,7 @@ impl TryToNav for hir::Label {
) -> Option<UpmappingResult<NavigationTarget>> {
let db = sema.db;
let InFile { file_id, value } = self.source(db)?;
// Labels can't be keywords, so no escaping needed.
let name = self.name(db).display_no_db(Edition::Edition2015).to_smolstr();
let name = self.name(db).symbol().clone();
Some(orig_range_with_focus(db, file_id, value.syntax(), value.lifetime()).map(
|(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget {
@ -722,8 +706,7 @@ impl TryToNav for hir::TypeParam {
) -> Option<UpmappingResult<NavigationTarget>> {
let db = sema.db;
let InFile { file_id, value } = self.merge().source(db)?;
let edition = self.module(db).krate().edition(db);
let name = self.name(db).display_no_db(edition).to_smolstr();
let name = self.name(db).symbol().clone();
let value = match value {
Either::Left(ast::TypeOrConstParam::Type(x)) => Either::Left(x),
@ -772,8 +755,7 @@ impl TryToNav for hir::LifetimeParam {
) -> Option<UpmappingResult<NavigationTarget>> {
let db = sema.db;
let InFile { file_id, value } = self.source(db)?;
// Lifetimes cannot be keywords, so not escaping needed.
let name = self.name(db).display_no_db(Edition::Edition2015).to_smolstr();
let name = self.name(db).symbol().clone();
Some(orig_range(db, file_id, value.syntax()).map(
|(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget {
@ -798,8 +780,7 @@ impl TryToNav for hir::ConstParam {
) -> Option<UpmappingResult<NavigationTarget>> {
let db = sema.db;
let InFile { file_id, value } = self.merge().source(db)?;
let edition = self.module(db).krate().edition(db);
let name = self.name(db).display_no_db(edition).to_smolstr();
let name = self.name(db).symbol().clone();
let value = match value {
Either::Left(ast::TypeOrConstParam::Const(x)) => x,
@ -834,21 +815,17 @@ impl TryToNav for hir::InlineAsmOperand {
let InFile { file_id, value } = &self.source(db)?;
let file_id = *file_id;
Some(orig_range_with_focus(db, file_id, value.syntax(), value.name()).map(
|(FileRange { file_id, range: full_range }, focus_range)| {
let edition = self.parent(db).module(db).krate().edition(db);
NavigationTarget {
file_id,
name: self
.name(db)
.map_or_else(|| "_".into(), |it| it.display(db, edition).to_smolstr()),
alias: None,
kind: Some(SymbolKind::Local),
full_range,
focus_range,
container_name: None,
description: None,
docs: None,
}
|(FileRange { file_id, range: full_range }, focus_range)| NavigationTarget {
file_id,
name:
self.name(db).map_or_else(|| sym::underscore.clone(), |it| it.symbol().clone()),
alias: None,
kind: Some(SymbolKind::Local),
full_range,
focus_range,
container_name: None,
description: None,
docs: None,
},
))
}

View file

@ -1058,7 +1058,7 @@ use self$0;
use self$0;
"#,
expect![[r#"
Module FileId(0) 0..10
_ Module FileId(0) 0..10
FileId(0) 4..8 import
"#]],
@ -3130,7 +3130,7 @@ fn foo<'r#fn>(s: &'r#fn str) {
}
"#,
expect![[r#"
'r#break Label FileId(0) 87..96 87..95
'break Label FileId(0) 87..96 87..95
FileId(0) 113..121
"#]],
@ -3146,7 +3146,7 @@ fn foo<'r#fn$0>(s: &'r#fn str) {
}
"#,
expect![[r#"
'r#fn LifetimeParam FileId(0) 7..12
'fn LifetimeParam FileId(0) 7..12
FileId(0) 18..23
FileId(0) 44..49

View file

@ -231,7 +231,7 @@ fn cmp_runnables(
.cmp(&nav_b.focus_range.map_or_else(t_0, |it| it.start()))
})
.then_with(|| kind.disc().cmp(&kind_b.disc()))
.then_with(|| nav.name.cmp(&nav_b.name))
.then_with(|| nav.name.as_str().cmp(nav_b.name.as_str()))
}
fn find_related_tests(
@ -817,7 +817,7 @@ mod not_a_root {
"#,
expect![[r#"
[
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..331, name: \"\", kind: Module })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..331, name: \"_\", kind: Module })",
"(Bin, NavigationTarget { file_id: FileId(0), full_range: 1..13, focus_range: 4..8, name: \"main\", kind: Function })",
"(Bin, NavigationTarget { file_id: FileId(0), full_range: 15..76, focus_range: 42..71, name: \"__cortex_m_rt_main_trampoline\", kind: Function })",
"(Bin, NavigationTarget { file_id: FileId(0), full_range: 78..154, focus_range: 113..149, name: \"__cortex_m_rt_main_trampoline_unsafe\", kind: Function })",
@ -1138,7 +1138,7 @@ fn test_foo1() {}
"#,
expect![[r#"
[
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..51, name: \"\", kind: Module })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..51, name: \"_\", kind: Module })",
"(Test, NavigationTarget { file_id: FileId(0), full_range: 1..50, focus_range: 36..45, name: \"test_foo1\", kind: Function }, Atom(KeyValue { key: \"feature\", value: \"foo\" }))",
]
"#]],
@ -1157,7 +1157,7 @@ fn test_foo1() {}
"#,
expect![[r#"
[
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..73, name: \"\", kind: Module })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..73, name: \"_\", kind: Module })",
"(Test, NavigationTarget { file_id: FileId(0), full_range: 1..72, focus_range: 58..67, name: \"test_foo1\", kind: Function }, All([Atom(KeyValue { key: \"feature\", value: \"foo\" }), Atom(KeyValue { key: \"feature\", value: \"bar\" })]))",
]
"#]],
@ -1236,7 +1236,7 @@ generate_main!();
"#,
expect![[r#"
[
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..345, name: \"\", kind: Module })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 0..345, name: \"_\", kind: Module })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 282..312, focus_range: 286..291, name: \"tests\", kind: Module, description: \"mod tests\" })",
"(Test, NavigationTarget { file_id: FileId(0), full_range: 298..307, name: \"foo_test\", kind: Function })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 313..323, name: \"tests2\", kind: Module, description: \"mod tests2\" }, true)",
@ -1679,10 +1679,10 @@ mod r#mod {
"#,
expect![[r#"
[
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 1..461, focus_range: 5..10, name: \"r#mod\", kind: Module, description: \"mod r#mod\" })",
"(TestMod, NavigationTarget { file_id: FileId(0), full_range: 1..461, focus_range: 5..10, name: \"mod\", kind: Module, description: \"mod r#mod\" })",
"(Test, NavigationTarget { file_id: FileId(0), full_range: 17..41, focus_range: 32..36, name: \"r#fn\", kind: Function })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 47..84, name: \"r#for\", container_name: \"r#mod\" })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 90..146, name: \"r#struct\", container_name: \"r#mod\" })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 47..84, name: \"r#for\", container_name: \"mod\" })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 90..146, name: \"r#struct\", container_name: \"mod\" })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 152..266, focus_range: 189..205, name: \"impl\", kind: Impl })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 216..260, name: \"r#fn\" })",
"(DocTest, NavigationTarget { file_id: FileId(0), full_range: 323..367, name: \"r#fn\" })",

View file

@ -59,11 +59,9 @@ mod tests {
use expect_test::expect;
use ide_assists::{Assist, AssistResolveStrategy};
use ide_db::{
FileRange, FxHashSet, RootDatabase, base_db::salsa::Durability,
symbol_index::SymbolsDatabase,
FileRange, FxHashSet, RootDatabase, base_db::salsa::Setter as _, symbol_index::LocalRoots,
};
use test_fixture::WithFixture;
use triomphe::Arc;
use super::ssr_assists;
@ -74,7 +72,7 @@ mod tests {
let (mut db, file_id, range_or_offset) = RootDatabase::with_range_or_offset(ra_fixture);
let mut local_roots = FxHashSet::default();
local_roots.insert(test_fixture::WORKSPACE);
db.set_local_roots_with_durability(Arc::new(local_roots), Durability::HIGH);
LocalRoots::get(&db).set_roots(&mut db).to(local_roots);
ssr_assists(
&db,
&resolve,

View file

@ -85,6 +85,7 @@ define_symbols! {
false_ = "false",
let_ = "let",
const_ = "const",
kw_impl = "impl",
proc_dash_macro = "proc-macro",
aapcs_dash_unwind = "aapcs-unwind",
avr_dash_interrupt = "avr-interrupt",

View file

@ -54,12 +54,13 @@ pub(super) fn impl_(p: &mut Parser<'_>, m: Marker) {
// impl const Send for S {}
p.eat(T![const]);
// FIXME: never type
// test impl_item_never_type
// impl ! {}
// test impl_item_neg
// impl !Send for S {}
p.eat(T![!]);
if p.at(T![!]) && !p.nth_at(1, T!['{']) {
// test impl_item_neg
// impl !Send for S {}
p.eat(T![!]);
}
impl_type(p);
if p.eat(T![for]) {
impl_type(p);

View file

@ -322,6 +322,10 @@ mod ok {
#[test]
fn impl_item_neg() { run_and_expect_no_errors("test_data/parser/inline/ok/impl_item_neg.rs"); }
#[test]
fn impl_item_never_type() {
run_and_expect_no_errors("test_data/parser/inline/ok/impl_item_never_type.rs");
}
#[test]
fn impl_trait_type() {
run_and_expect_no_errors("test_data/parser/inline/ok/impl_trait_type.rs");
}

View file

@ -0,0 +1,11 @@
SOURCE_FILE
IMPL
IMPL_KW "impl"
WHITESPACE " "
NEVER_TYPE
BANG "!"
WHITESPACE " "
ASSOC_ITEM_LIST
L_CURLY "{"
R_CURLY "}"
WHITESPACE "\n"

View file

@ -0,0 +1 @@
impl ! {}

View file

@ -50,7 +50,6 @@ impl flags::Search {
/// for much else.
pub fn run(self) -> anyhow::Result<()> {
use ide_db::base_db::SourceDatabase;
use ide_db::symbol_index::SymbolsDatabase;
let cargo_config =
CargoConfig { all_targets: true, set_test: true, ..CargoConfig::default() };
let load_cargo_config = LoadCargoConfig {
@ -69,7 +68,7 @@ impl flags::Search {
match_finder.add_search_pattern(pattern)?;
}
if let Some(debug_snippet) = &self.debug {
for &root in db.local_roots().iter() {
for &root in ide_db::symbol_index::LocalRoots::get(db).roots(db).iter() {
let sr = db.source_root(root).source_root(db);
for file_id in sr.iter() {
for debug_info in match_finder.debug_where_text_equal(

View file

@ -62,8 +62,8 @@ impl LineEndings {
// Account for removed `\r`.
// After `set_len`, `buf` is guaranteed to contain utf-8 again.
let new_len = buf.len() - gap_len;
let src = unsafe {
let new_len = buf.len() - gap_len;
buf.set_len(new_len);
String::from_utf8_unchecked(buf)
};

View file

@ -117,7 +117,7 @@ impl<A: ?Sized + Downcast> Map<A> {
#[inline]
#[must_use]
pub fn get<T: IntoBox<A>>(&self) -> Option<&T> {
self.raw.get(&TypeId::of::<T>()).map(|any| unsafe { any.downcast_ref_unchecked::<T>() })
self.raw.get(&TypeId::of::<T>()).map(|any| unsafe { any.downcast_unchecked_ref::<T>() })
}
/// Gets the entry for the given type in the collection for in-place manipulation
@ -172,7 +172,7 @@ impl<'map, A: ?Sized + Downcast, V: IntoBox<A>> OccupiedEntry<'map, A, V> {
#[inline]
#[must_use]
pub fn into_mut(self) -> &'map mut V {
unsafe { self.inner.into_mut().downcast_mut_unchecked() }
unsafe { self.inner.into_mut().downcast_unchecked_mut() }
}
}
@ -181,7 +181,7 @@ impl<'map, A: ?Sized + Downcast, V: IntoBox<A>> VacantEntry<'map, A, V> {
/// and returns a mutable reference to it
#[inline]
pub fn insert(self, value: V) -> &'map mut V {
unsafe { self.inner.insert(value.into_box()).downcast_mut_unchecked() }
unsafe { self.inner.insert(value.into_box()).downcast_unchecked_mut() }
}
}
@ -244,14 +244,14 @@ pub trait Downcast {
/// # Safety
///
/// The caller must ensure that `T` matches the trait object, on pain of *undefined behavior*.
unsafe fn downcast_ref_unchecked<T: 'static>(&self) -> &T;
unsafe fn downcast_unchecked_ref<T: 'static>(&self) -> &T;
/// Downcast from `&mut Any` to `&mut T`, without checking the type matches.
///
/// # Safety
///
/// The caller must ensure that `T` matches the trait object, on pain of *undefined behavior*.
unsafe fn downcast_mut_unchecked<T: 'static>(&mut self) -> &mut T;
unsafe fn downcast_unchecked_mut<T: 'static>(&mut self) -> &mut T;
}
/// A trait for the conversion of an object into a boxed trait object.
@ -269,12 +269,12 @@ macro_rules! implement {
}
#[inline]
unsafe fn downcast_ref_unchecked<T: 'static>(&self) -> &T {
unsafe fn downcast_unchecked_ref<T: 'static>(&self) -> &T {
unsafe { &*std::ptr::from_ref::<Self>(self).cast::<T>() }
}
#[inline]
unsafe fn downcast_mut_unchecked<T: 'static>(&mut self) -> &mut T {
unsafe fn downcast_unchecked_mut<T: 'static>(&mut self) -> &mut T {
unsafe { &mut *std::ptr::from_mut::<Self>(self).cast::<T>() }
}
}

View file

@ -1,4 +1,4 @@
rust-analyzer's testing is based on *snapshot tests*: a test is a piece of input text, usually a Rust code, and some output text. There is then some testing helper that runs the feature on the input text and compares the result to the output text.
rust-analyzer's testing is based on *snapshot tests*: a test is a piece of input text, usually Rust code, and some output text. There is then some testing helper that runs the feature on the input text and compares the result to the output text.
rust-analyzer uses a combination of the crate [`expect-test`](https://docs.rs/expect-test) and a custom testing framework.
@ -20,7 +20,7 @@ fn main() {
}
```
Sometimes we want to check more that there are no type mismatches. For that we use other helpers. For example, often we want to assert that the type of some expression is some specific type. For that we use the `check_types()` function. It takes a Rust code string with custom annotation, that are common in our test suite. The general scheme of annotation is:
Sometimes we want to check more than that there are no type mismatches. For that we use other helpers. For example, often we want to assert that the type of some expression is some specific type. For that we use the `check_types()` function. It takes a Rust code string with custom annotation, which are common in our test suite. The general scheme of annotation is:
- `$0` marks a position. What to do with it is determined by the testing helper. Commonly it denotes the cursor position in IDE tests (for example, hover).
- `$0...$0` marks a range, commonly a selection in IDE tests.
@ -31,7 +31,7 @@ Sometimes we want to check more that there are no type mismatches. For that we u
// ^^^^ hey
```
`check_types()` uses labels to assert type: when you attach a label to a range, `check_types()` assert that the type of this range will be what written in the label.
`check_types()` uses labels to assert types: when you attach a label to a range, `check_types()` asserts that the type of this range will be what is written in the label.
It's all too abstract without an example:
```rust
@ -67,9 +67,9 @@ fn main() {
);
}
```
The text inside the `expect![[]]` is determined by the helper, `check_infer()` in this case. For `check_infer()`, each line is a range in the source code (the range is counted in bytes and the source is trimmed, indentation is stripped), next to it there is the text in that range, or some part of it with `...` if it's too long, and finally comes the type of that range.
The text inside the `expect![[]]` is determined by the helper, `check_infer()` in this case. For `check_infer()`, each line is a range in the source code (the range is counted in bytes and the source is trimmed, so indentation is stripped); next to it there is the text in that range, or some part of it with `...` if it's too long, and finally comes the type of that range.
The important feature of `expect-test` is that it allows easy update of the expectation. Say you changed something in the code, maybe fixed a bug, and the output in `expect![[]]` needs to change. Or maybe you are writing it from scratch. Writing it by hand is very tedious and prone to mistakes. But `expect-trait` has a magic. You can set the environment variable `UPDATE_EXPECT=1`, then run the test, and it will update automatically! Some editors (e.g. VSCode) make it even more convenient: on them, on the top of every test that uses `expect-test`, next to the usual `Run | Debug` buttons, rust-analyzer also shows an `Update Expect` button. Clicking it will run that test in updating mode.
The important feature of `expect-test` is that it allows easy update of the expectation. Say you changed something in the code, maybe fixed a bug, and the output in `expect![[]]` needs to change. Or maybe you are writing it from scratch. Writing it by hand is very tedious and prone to mistakes. But `expect-trait` has some magic. You can set the environment variable `UPDATE_EXPECT=1`, then run the test, and it will update automatically! Some editors (e.g. VSCode) make it even more convenient: on them, at the top of every test that uses `expect-test`, next to the usual `Run | Debug` buttons, rust-analyzer also shows an `Update Expect` button. Clicking it will run that test in updating mode.
## Rust code in the tests
@ -77,13 +77,13 @@ The first thing that you probably already noticed is that the Rust code in the t
The syntax highlighting is very important, not just because it's nice to the eye: it's very easy to make mistakes in test code, and debugging that can be very hard. Often the test will just fail, printing an `{unknown}` type, and you'll have no clue what's going wrong. The syntax is the clue; if something isn't highlighted correctly, that probably means there is an error (there is one exception to this, which we'll discuss later). You can even set the semantic highlighting tag `unresolved_reference` to e.g. red, so you will see such things clearly.
Still, often you won't know what's going wrong. Why you can't fix the test, or worse, you expect it to fail but it doesn't. You can try the code on a real IDE to be sure it works. Later we'll give some tips to fix the test.
Still, often you won't know what's going wrong. Why you can't fix the test, or worse, you expect it to fail but it doesn't. You can try the code on a real IDE to be sure it works. Later we'll give some tips for fixing the test.
### The fixture
The Rust code in a test is not, a fact, a single Rust file. It has a mini-language that allows you to express multiple files, multiple crates, different configs, and more. All options are documented in `crates/test-utils/src/fixture.rs`, but here are some of the common ones:
The Rust code in a test is not, a fact, a single Rust file. It uses a mini-language that allows you to express multiple files, multiple crates, different configs, and more. All options are documented in `crates/test-utils/src/fixture.rs`, but here are some of the common ones:
- `//- minicore: flag1, flag2, ...`. This is by far the most common flag. Tests in rust-analyzer don't have access by default to any other type - not `Option`, not `Iterator`, not even `Sized`. This flag allows you to include parts of the `crates/test-utils/src/minicore.rs` file, which mimics `core`. All possible flags are listed at the top of `minicore` along with the flags they imply, then later you can see by `// region:flag` and `// endregion:flag` what code each flag enables.
- `//- minicore: flag1, flag2, ...`. This is by far the most common option. Tests in rust-analyzer don't have access by default to any other type - not `Option`, not `Iterator`, not even `Sized`. This option allows you to include parts of the `crates/test-utils/src/minicore.rs` file, which mimics `core`. All possible flags are listed at the top of `minicore` along with the flags they imply, then later you can see by `// region:flag` and `// endregion:flag` what code each flag enables.
- `// /path/to/file.rs crate:crate deps:dep_a,dep_b`. The first component is the filename of the code that follows (until the next file). It is required, but only if you supply this line. Other components in this line are optional. They include `crate:crate_name`, to start a new crate, or `deps:dep_a,dep_b`, to declare dependencies between crates. You can also declare modules as usual in Rust - just name your paths `/foo.rs` or `/foo/mod.rs`, declare `mod foo` and that's it!
So the following snippet:
@ -96,11 +96,11 @@ pub struct Bar;
// /main.rs crate:main deps:foo
use foo::Bar;
```
Declares two crates `foo` and `main` where `main` depends on `foo`, with dependency in `Sized` and the `FnX` traits from `core`, and a module of `foo` called `bar`.
declares two crates `foo` and `main`, where `main` depends on `foo`, with dependencies on the `Sized` and `FnX` traits from `core`, and a module of `foo` called `bar`.
And as promised, here are some tips to make your test work:
And as promised, here are some tips for making your test work:
- If you use some type/trait, you must *always* include it in `minicore`. Note - not all types from core/std are available there, you can add new (under flags) if you need. And import them if they are not in the prelude.
- If you use some type/trait, you must *always* include it in `minicore`. Note - not all types from core/std are available there, but you can add new ones (under flags) if you need. And import them if they are not in the prelude.
- If you use unsized types (`dyn Trait`/slices), you may want to include some or all of the following `minicore` flags: `sized`, `unsize`, `coerce_unsized`, `dispatch_from_dyn`.
- If you use closures, consider including the `fn` minicore flag. Async closures need the `async_fn` flag.
- `sized` is commonly needed, consider adding it if you're stuck.
- `sized` is commonly needed, so consider adding it if you're stuck.

38
lib/smol_str/CHANGELOG.md Normal file
View file

@ -0,0 +1,38 @@
# Changelog
## Unreleased
## 0.3.4 - 2025-10-23
- Added `rust-version` field to `Cargo.toml`
## 0.3.3 - 2025-10-23
- Optimise `StrExt::to_ascii_lowercase_smolstr`, `StrExt::to_ascii_uppercase_smolstr`
~2x speedup inline, ~4-22x for heap.
- Optimise `StrExt::to_lowercase_smolstr`, `StrExt::to_uppercase_smolstr` ~2x speedup inline, ~5-50x for heap.
- Optimise `StrExt::replace_smolstr`, `StrExt::replacen_smolstr` for single ascii replace,
~3x speedup inline & heap.
## 0.3.2 - 2024-10-23
- Fix `SmolStrBuilder::push` incorrectly padding null bytes when spilling onto the heap on a
multibyte character push
## 0.3.1 - 2024-09-04
- Fix `SmolStrBuilder` leaking implementation details
## 0.3.0 - 2024-09-04
- Remove deprecated `SmolStr::new_inline_from_ascii` function
- Remove `SmolStr::to_string` in favor of `ToString::to_string`
- Add `impl AsRef<[u8]> for SmolStr` impl
- Add `impl AsRef<OsStr> for SmolStr` impl
- Add `impl AsRef<Path> for SmolStr` impl
- Add `SmolStrBuilder`
## 0.2.2 - 2024-05-14
- Add `StrExt` trait providing `to_lowercase_smolstr`, `replace_smolstr` and similar
- Add `PartialEq` optimization for `ptr_eq`-able representations

37
lib/smol_str/Cargo.toml Normal file
View file

@ -0,0 +1,37 @@
[package]
name = "smol_str"
version = "0.3.4"
description = "small-string optimized string type with O(1) clone"
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-lang/rust-analyzer/tree/master/lib/smol_str"
authors = ["Aleksey Kladov <aleksey.kladov@gmail.com>", "Lukas Wirth <lukastw97@gmail.com>"]
edition = "2024"
rust-version = "1.89"
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]
all-features = true
[dependencies]
serde_core = { version = "1.0.220", optional = true, default-features = false }
borsh = { version = "1.4.0", optional = true, default-features = false }
arbitrary = { version = "1.3", optional = true }
[dev-dependencies]
proptest = "1.5"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
criterion = "0.7"
rand = "0.9.2"
[features]
default = ["std"]
std = ["serde_core?/std", "borsh?/std"]
serde = ["dep:serde_core"]
[[bench]]
name = "bench"
harness = false
[lints]
workspace = true

201
lib/smol_str/LICENSE-APACHE Normal file
View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

23
lib/smol_str/LICENSE-MIT Normal file
View file

@ -0,0 +1,23 @@
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

35
lib/smol_str/README.md Normal file
View file

@ -0,0 +1,35 @@
# smol_str
[![CI](https://github.com/rust-analyzer/smol_str/workflows/CI/badge.svg)](https://github.com/rust-analyzer/smol_str/actions?query=branch%3Amaster+workflow%3ACI)
[![Crates.io](https://img.shields.io/crates/v/smol_str.svg)](https://crates.io/crates/smol_str)
[![API reference](https://docs.rs/smol_str/badge.svg)](https://docs.rs/smol_str/)
A `SmolStr` is a string type that has the following properties:
* `size_of::<SmolStr>() == 24` (therefore `== size_of::<String>()` on 64 bit platforms)
* `Clone` is `O(1)`
* Strings are stack-allocated if they are:
* Up to 23 bytes long
* Longer than 23 bytes, but substrings of `WS` (see `src/lib.rs`). Such strings consist
solely of consecutive newlines, followed by consecutive spaces
* If a string does not satisfy the aforementioned conditions, it is heap-allocated
* Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
Unlike `String`, however, `SmolStr` is immutable. The primary use case for
`SmolStr` is a good enough default storage for tokens of typical programming
languages. Strings consisting of a series of newlines, followed by a series of
whitespace are a typical pattern in computer programs because of indentation.
Note that a specialized interner might be a better solution for some use cases.
## Benchmarks
Run criterion benches with
```sh
cargo bench --bench \* -- --quick
```
## MSRV Policy
Minimal Supported Rust Version: latest stable.
Bumping MSRV is not considered a semver-breaking change.

View file

@ -0,0 +1,118 @@
use criterion::{Criterion, criterion_group, criterion_main};
use rand::distr::{Alphanumeric, SampleString};
use smol_str::{SmolStr, StrExt, ToSmolStr, format_smolstr};
use std::hint::black_box;
/// 12: small (inline)
/// 50: medium (heap)
/// 1000: large (heap)
const TEST_LENS: [usize; 3] = [12, 50, 1000];
fn format_bench(c: &mut Criterion) {
for len in TEST_LENS {
let n = rand::random_range(10000..99999);
let str_len = len.checked_sub(n.to_smolstr().len()).unwrap();
let str = Alphanumeric.sample_string(&mut rand::rng(), str_len);
c.bench_function(&format!("format_smolstr! len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = format_smolstr!("{str}-{n}"));
assert_eq!(v, format!("{str}-{n}"));
});
}
}
fn from_str_bench(c: &mut Criterion) {
for len in TEST_LENS {
let str = Alphanumeric.sample_string(&mut rand::rng(), len);
c.bench_function(&format!("SmolStr::from len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = SmolStr::from(black_box(&str)));
assert_eq!(v, str);
});
}
}
fn clone_bench(c: &mut Criterion) {
for len in TEST_LENS {
let str = Alphanumeric.sample_string(&mut rand::rng(), len);
let smolstr = SmolStr::new(&str);
c.bench_function(&format!("SmolStr::clone len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = smolstr.clone());
assert_eq!(v, str);
});
}
}
fn eq_bench(c: &mut Criterion) {
for len in TEST_LENS {
let str = Alphanumeric.sample_string(&mut rand::rng(), len);
let smolstr = SmolStr::new(&str);
c.bench_function(&format!("SmolStr::eq len={len}"), |b| {
let mut v = false;
b.iter(|| v = smolstr == black_box(&str));
assert!(v);
});
}
}
fn to_lowercase_bench(c: &mut Criterion) {
const END_CHAR: char = 'İ';
for len in TEST_LENS {
// mostly ascii seq with some non-ascii at the end
let mut str = Alphanumeric.sample_string(&mut rand::rng(), len - END_CHAR.len_utf8());
str.push(END_CHAR);
let str = str.as_str();
c.bench_function(&format!("to_lowercase_smolstr len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = str.to_lowercase_smolstr());
assert_eq!(v, str.to_lowercase());
});
}
}
fn to_ascii_lowercase_bench(c: &mut Criterion) {
for len in TEST_LENS {
let str = Alphanumeric.sample_string(&mut rand::rng(), len);
let str = str.as_str();
c.bench_function(&format!("to_ascii_lowercase_smolstr len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = str.to_ascii_lowercase_smolstr());
assert_eq!(v, str.to_ascii_lowercase());
});
}
}
fn replace_bench(c: &mut Criterion) {
for len in TEST_LENS {
let s_dash_s = Alphanumeric.sample_string(&mut rand::rng(), len / 2)
+ "-"
+ &Alphanumeric.sample_string(&mut rand::rng(), len - 1 - len / 2);
let str = s_dash_s.as_str();
c.bench_function(&format!("replace_smolstr len={len}"), |b| {
let mut v = <_>::default();
b.iter(|| v = str.replace_smolstr("-", "_"));
assert_eq!(v, str.replace("-", "_"));
});
}
}
criterion_group!(
benches,
format_bench,
from_str_bench,
clone_bench,
eq_bench,
to_lowercase_bench,
to_ascii_lowercase_bench,
replace_bench,
);
criterion_main!(benches);

40
lib/smol_str/src/borsh.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::{INLINE_CAP, Repr, SmolStr};
use alloc::string::{String, ToString};
use borsh::{
BorshDeserialize, BorshSerialize,
io::{Error, ErrorKind, Read, Write},
};
use core::mem::transmute;
impl BorshSerialize for SmolStr {
fn serialize<W: Write>(&self, writer: &mut W) -> borsh::io::Result<()> {
self.as_str().serialize(writer)
}
}
impl BorshDeserialize for SmolStr {
#[inline]
fn deserialize_reader<R: Read>(reader: &mut R) -> borsh::io::Result<Self> {
let len = u32::deserialize_reader(reader)?;
if (len as usize) < INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
reader.read_exact(&mut buf[..len as usize])?;
_ = core::str::from_utf8(&buf[..len as usize]).map_err(|err| {
let msg = err.to_string();
Error::new(ErrorKind::InvalidData, msg)
})?;
Ok(SmolStr(Repr::Inline {
len: unsafe { transmute::<u8, crate::InlineSize>(len as u8) },
buf,
}))
} else {
// u8::vec_from_reader always returns Some on success in current implementation
let vec = u8::vec_from_reader(len, reader)?
.ok_or_else(|| Error::other("u8::vec_from_reader unexpectedly returned None"))?;
Ok(SmolStr::from(String::from_utf8(vec).map_err(|err| {
let msg = err.to_string();
Error::new(ErrorKind::InvalidData, msg)
})?))
}
}
}

970
lib/smol_str/src/lib.rs Normal file
View file

@ -0,0 +1,970 @@
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
extern crate alloc;
use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};
use core::{
borrow::Borrow,
cmp::{self, Ordering},
convert::Infallible,
fmt, hash, iter, mem, ops,
str::FromStr,
};
/// A `SmolStr` is a string type that has the following properties:
///
/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)
/// * `Clone` is `O(1)`
/// * Strings are stack-allocated if they are:
/// * Up to 23 bytes long
/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist
/// solely of consecutive newlines, followed by consecutive spaces
/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
/// * Additionally, a `SmolStr` can be explicitly created from a `&'static str` without allocation
///
/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
/// `SmolStr` is a good enough default storage for tokens of typical programming
/// languages. Strings consisting of a series of newlines, followed by a series of
/// whitespace are a typical pattern in computer programs because of indentation.
/// Note that a specialized interner might be a better solution for some use cases.
///
/// `WS`: A string of 32 newlines followed by 128 spaces.
pub struct SmolStr(Repr);
impl SmolStr {
/// Constructs an inline variant of `SmolStr`.
///
/// This never allocates.
///
/// # Panics
///
/// Panics if `text.len() > 23`.
#[inline]
pub const fn new_inline(text: &str) -> SmolStr {
assert!(text.len() <= INLINE_CAP); // avoids bounds checks in loop
let text = text.as_bytes();
let mut buf = [0; INLINE_CAP];
let mut i = 0;
while i < text.len() {
buf[i] = text[i];
i += 1
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we asserted it.
len: unsafe { InlineSize::transmute_from_u8(text.len() as u8) },
buf,
})
}
/// Constructs a `SmolStr` from a statically allocated string.
///
/// This never allocates.
#[inline(always)]
pub const fn new_static(text: &'static str) -> SmolStr {
// NOTE: this never uses the inline storage; if a canonical
// representation is needed, we could check for `len() < INLINE_CAP`
// and call `new_inline`, but this would mean an extra branch.
SmolStr(Repr::Static(text))
}
/// Constructs a `SmolStr` from a `str`, heap-allocating if necessary.
#[inline(always)]
pub fn new(text: impl AsRef<str>) -> SmolStr {
SmolStr(Repr::new(text.as_ref()))
}
/// Returns a `&str` slice of this `SmolStr`.
#[inline(always)]
pub fn as_str(&self) -> &str {
self.0.as_str()
}
/// Returns the length of `self` in bytes.
#[inline(always)]
pub fn len(&self) -> usize {
self.0.len()
}
/// Returns `true` if `self` has a length of zero bytes.
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Returns `true` if `self` is heap-allocated.
#[inline(always)]
pub const fn is_heap_allocated(&self) -> bool {
matches!(self.0, Repr::Heap(..))
}
}
impl Clone for SmolStr {
#[inline]
fn clone(&self) -> Self {
if !self.is_heap_allocated() {
// SAFETY: We verified that the payload of `Repr` is a POD
return unsafe { core::ptr::read(self as *const SmolStr) };
}
Self(self.0.clone())
}
}
impl Default for SmolStr {
#[inline(always)]
fn default() -> SmolStr {
SmolStr(Repr::Inline { len: InlineSize::_V0, buf: [0; INLINE_CAP] })
}
}
impl ops::Deref for SmolStr {
type Target = str;
#[inline(always)]
fn deref(&self) -> &str {
self.as_str()
}
}
// region: PartialEq implementations
impl Eq for SmolStr {}
impl PartialEq<SmolStr> for SmolStr {
fn eq(&self, other: &SmolStr) -> bool {
self.0.ptr_eq(&other.0) || self.as_str() == other.as_str()
}
}
impl PartialEq<str> for SmolStr {
#[inline(always)]
fn eq(&self, other: &str) -> bool {
self.as_str() == other
}
}
impl PartialEq<SmolStr> for str {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
other == self
}
}
impl<'a> PartialEq<&'a str> for SmolStr {
#[inline(always)]
fn eq(&self, other: &&'a str) -> bool {
self == *other
}
}
impl PartialEq<SmolStr> for &str {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
*self == other
}
}
impl PartialEq<String> for SmolStr {
#[inline(always)]
fn eq(&self, other: &String) -> bool {
self.as_str() == other
}
}
impl PartialEq<SmolStr> for String {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
other == self
}
}
impl<'a> PartialEq<&'a String> for SmolStr {
#[inline(always)]
fn eq(&self, other: &&'a String) -> bool {
self == *other
}
}
impl PartialEq<SmolStr> for &String {
#[inline(always)]
fn eq(&self, other: &SmolStr) -> bool {
*self == other
}
}
// endregion: PartialEq implementations
impl Ord for SmolStr {
fn cmp(&self, other: &SmolStr) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl PartialOrd for SmolStr {
fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl hash::Hash for SmolStr {
fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
self.as_str().hash(hasher);
}
}
impl fmt::Debug for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.as_str(), f)
}
}
impl fmt::Display for SmolStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.as_str(), f)
}
}
impl iter::FromIterator<char> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
from_char_iter(iter.into_iter())
}
}
#[inline]
fn from_char_iter(iter: impl Iterator<Item = char>) -> SmolStr {
from_buf_and_chars([0; _], 0, iter)
}
fn from_buf_and_chars(
mut buf: [u8; INLINE_CAP],
buf_len: usize,
mut iter: impl Iterator<Item = char>,
) -> SmolStr {
let min_size = iter.size_hint().0 + buf_len;
if min_size > INLINE_CAP {
let heap: String =
core::str::from_utf8(&buf[..buf_len]).unwrap().chars().chain(iter).collect();
if heap.len() <= INLINE_CAP {
// size hint lied
return SmolStr::new_inline(&heap);
}
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
let mut len = buf_len;
while let Some(ch) = iter.next() {
let size = ch.len_utf8();
if size + len > INLINE_CAP {
let (min_remaining, _) = iter.size_hint();
let mut heap = String::with_capacity(size + len + min_remaining);
heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
heap.push(ch);
heap.extend(iter);
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
ch.encode_utf8(&mut buf[len..]);
len += size;
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we otherwise return early.
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
}
fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
where
T: AsRef<str>,
String: iter::Extend<T>,
{
let mut len = 0;
let mut buf = [0u8; INLINE_CAP];
while let Some(slice) = iter.next() {
let slice = slice.as_ref();
let size = slice.len();
if size + len > INLINE_CAP {
let mut heap = String::with_capacity(size + len);
heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
heap.push_str(slice);
heap.extend(iter);
return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
}
buf[len..][..size].copy_from_slice(slice.as_bytes());
len += size;
}
SmolStr(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
// as we otherwise return early.
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
}
impl iter::FromIterator<String> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
build_from_str_iter(iter.into_iter())
}
}
impl<'a> iter::FromIterator<&'a String> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
SmolStr::from_iter(iter.into_iter().map(|x| x.as_str()))
}
}
impl<'a> iter::FromIterator<&'a str> for SmolStr {
fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
build_from_str_iter(iter.into_iter())
}
}
impl AsRef<str> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<[u8]> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
self.as_str().as_bytes()
}
}
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
impl AsRef<std::ffi::OsStr> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &std::ffi::OsStr {
AsRef::<std::ffi::OsStr>::as_ref(self.as_str())
}
}
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
impl AsRef<std::path::Path> for SmolStr {
#[inline(always)]
fn as_ref(&self) -> &std::path::Path {
AsRef::<std::path::Path>::as_ref(self.as_str())
}
}
impl From<&str> for SmolStr {
#[inline]
fn from(s: &str) -> SmolStr {
SmolStr::new(s)
}
}
impl From<&mut str> for SmolStr {
#[inline]
fn from(s: &mut str) -> SmolStr {
SmolStr::new(s)
}
}
impl From<&String> for SmolStr {
#[inline]
fn from(s: &String) -> SmolStr {
SmolStr::new(s)
}
}
impl From<String> for SmolStr {
#[inline(always)]
fn from(text: String) -> Self {
Self::new(text)
}
}
impl From<Box<str>> for SmolStr {
#[inline]
fn from(s: Box<str>) -> SmolStr {
SmolStr::new(s)
}
}
impl From<Arc<str>> for SmolStr {
#[inline]
fn from(s: Arc<str>) -> SmolStr {
let repr = Repr::new_on_stack(s.as_ref()).unwrap_or(Repr::Heap(s));
Self(repr)
}
}
impl<'a> From<Cow<'a, str>> for SmolStr {
#[inline]
fn from(s: Cow<'a, str>) -> SmolStr {
SmolStr::new(s)
}
}
impl From<SmolStr> for Arc<str> {
#[inline(always)]
fn from(text: SmolStr) -> Self {
match text.0 {
Repr::Heap(data) => data,
_ => text.as_str().into(),
}
}
}
impl From<SmolStr> for String {
#[inline(always)]
fn from(text: SmolStr) -> Self {
text.as_str().into()
}
}
impl Borrow<str> for SmolStr {
#[inline(always)]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl FromStr for SmolStr {
type Err = Infallible;
#[inline]
fn from_str(s: &str) -> Result<SmolStr, Self::Err> {
Ok(SmolStr::from(s))
}
}
const INLINE_CAP: usize = InlineSize::_V23 as usize;
const N_NEWLINES: usize = 32;
const N_SPACES: usize = 128;
const WS: &str = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
const _: () = {
assert!(WS.len() == N_NEWLINES + N_SPACES);
assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');
assert!(WS.as_bytes()[N_NEWLINES] == b' ');
};
/// A [`u8`] with a bunch of niches.
#[derive(Clone, Copy, Debug, PartialEq)]
#[repr(u8)]
enum InlineSize {
_V0 = 0,
_V1,
_V2,
_V3,
_V4,
_V5,
_V6,
_V7,
_V8,
_V9,
_V10,
_V11,
_V12,
_V13,
_V14,
_V15,
_V16,
_V17,
_V18,
_V19,
_V20,
_V21,
_V22,
_V23,
}
impl InlineSize {
/// SAFETY: `value` must be less than or equal to [`INLINE_CAP`]
#[inline(always)]
const unsafe fn transmute_from_u8(value: u8) -> Self {
debug_assert!(value <= InlineSize::_V23 as u8);
// SAFETY: The caller is responsible to uphold this invariant
unsafe { mem::transmute::<u8, Self>(value) }
}
}
#[derive(Clone, Debug)]
enum Repr {
Inline { len: InlineSize, buf: [u8; INLINE_CAP] },
Static(&'static str),
Heap(Arc<str>),
}
impl Repr {
/// This function tries to create a new Repr::Inline or Repr::Static
/// If it isn't possible, this function returns None
fn new_on_stack<T>(text: T) -> Option<Self>
where
T: AsRef<str>,
{
let text = text.as_ref();
let len = text.len();
if len <= INLINE_CAP {
let mut buf = [0; INLINE_CAP];
buf[..len].copy_from_slice(text.as_bytes());
return Some(Repr::Inline {
// SAFETY: We know that `len` is less than or equal to the maximum value of `InlineSize`
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
});
}
if len <= N_NEWLINES + N_SPACES {
let bytes = text.as_bytes();
let possible_newline_count = cmp::min(len, N_NEWLINES);
let newlines =
bytes[..possible_newline_count].iter().take_while(|&&b| b == b'\n').count();
let possible_space_count = len - newlines;
if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {
let spaces = possible_space_count;
let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];
return Some(Repr::Static(substring));
}
}
None
}
fn new(text: &str) -> Self {
Self::new_on_stack(text).unwrap_or_else(|| Repr::Heap(Arc::from(text)))
}
#[inline(always)]
fn len(&self) -> usize {
match self {
Repr::Heap(data) => data.len(),
Repr::Static(data) => data.len(),
Repr::Inline { len, .. } => *len as usize,
}
}
#[inline(always)]
fn is_empty(&self) -> bool {
match self {
Repr::Heap(data) => data.is_empty(),
Repr::Static(data) => data.is_empty(),
&Repr::Inline { len, .. } => len as u8 == 0,
}
}
#[inline]
fn as_str(&self) -> &str {
match self {
Repr::Heap(data) => data,
Repr::Static(data) => data,
Repr::Inline { len, buf } => {
let len = *len as usize;
// SAFETY: len is guaranteed to be <= INLINE_CAP
let buf = unsafe { buf.get_unchecked(..len) };
// SAFETY: buf is guaranteed to be valid utf8 for ..len bytes
unsafe { ::core::str::from_utf8_unchecked(buf) }
}
}
}
fn ptr_eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Heap(l0), Self::Heap(r0)) => Arc::ptr_eq(l0, r0),
(Self::Static(l0), Self::Static(r0)) => core::ptr::eq(l0, r0),
(Self::Inline { len: l_len, buf: l_buf }, Self::Inline { len: r_len, buf: r_buf }) => {
l_len == r_len && l_buf == r_buf
}
_ => false,
}
}
}
/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.
///
/// Almost identical to [`ToString`], but converts to `SmolStr` instead.
pub trait ToSmolStr {
fn to_smolstr(&self) -> SmolStr;
}
/// [`str`] methods producing [`SmolStr`]s.
pub trait StrExt: private::Sealed {
/// Returns the lowercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_lowercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_lowercase_smolstr(&self) -> SmolStr;
/// Returns the uppercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_uppercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_uppercase_smolstr(&self) -> SmolStr;
/// Returns the ASCII lowercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_ascii_lowercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_ascii_lowercase_smolstr(&self) -> SmolStr;
/// Returns the ASCII uppercase equivalent of this string slice as a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::to_ascii_uppercase`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn to_ascii_uppercase_smolstr(&self) -> SmolStr;
/// Replaces all matches of a &str with another &str returning a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::replace`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr;
/// Replaces first N matches of a &str with another &str returning a new [`SmolStr`],
/// potentially without allocating.
///
/// See [`str::replacen`].
#[must_use = "this returns a new SmolStr without modifying the original"]
fn replacen_smolstr(&self, from: &str, to: &str, count: usize) -> SmolStr;
}
impl StrExt for str {
#[inline]
fn to_lowercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_lowercase);
from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_lowercase()))
} else {
self.to_lowercase().into()
}
}
#[inline]
fn to_uppercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let (buf, rest) = inline_convert_while_ascii(self, u8::to_ascii_uppercase);
from_buf_and_chars(buf, len - rest.len(), rest.chars().flat_map(|c| c.to_uppercase()))
} else {
self.to_uppercase().into()
}
}
#[inline]
fn to_ascii_lowercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
buf[..len].copy_from_slice(self.as_bytes());
buf[..len].make_ascii_lowercase();
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
} else {
self.to_ascii_lowercase().into()
}
}
#[inline]
fn to_ascii_uppercase_smolstr(&self) -> SmolStr {
let len = self.len();
if len <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
buf[..len].copy_from_slice(self.as_bytes());
buf[..len].make_ascii_uppercase();
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
})
} else {
self.to_ascii_uppercase().into()
}
}
#[inline]
fn replace_smolstr(&self, from: &str, to: &str) -> SmolStr {
self.replacen_smolstr(from, to, usize::MAX)
}
#[inline]
fn replacen_smolstr(&self, from: &str, to: &str, mut count: usize) -> SmolStr {
// Fast path for replacing a single ASCII character with another inline.
if let [from_u8] = from.as_bytes()
&& let [to_u8] = to.as_bytes()
{
return if self.len() <= count {
// SAFETY: `from_u8` & `to_u8` are ascii
unsafe { replacen_1_ascii(self, |b| if b == from_u8 { *to_u8 } else { *b }) }
} else {
unsafe {
replacen_1_ascii(self, |b| {
if b == from_u8 && count != 0 {
count -= 1;
*to_u8
} else {
*b
}
})
}
};
}
let mut result = SmolStrBuilder::new();
let mut last_end = 0;
for (start, part) in self.match_indices(from).take(count) {
// SAFETY: `start` is guaranteed to be within the bounds of `self` as per
// `match_indices` and last_end is always less than or equal to `start`
result.push_str(unsafe { self.get_unchecked(last_end..start) });
result.push_str(to);
last_end = start + part.len();
}
// SAFETY: `self.len()` is guaranteed to be within the bounds of `self` and last_end is
// always less than or equal to `self.len()`
result.push_str(unsafe { self.get_unchecked(last_end..self.len()) });
SmolStr::from(result)
}
}
/// SAFETY: `map` fn must only replace ascii with ascii or return unchanged bytes.
#[inline]
unsafe fn replacen_1_ascii(src: &str, mut map: impl FnMut(&u8) -> u8) -> SmolStr {
if src.len() <= INLINE_CAP {
let mut buf = [0u8; INLINE_CAP];
for (idx, b) in src.as_bytes().iter().enumerate() {
buf[idx] = map(b);
}
SmolStr(Repr::Inline {
// SAFETY: `len` is in bounds
len: unsafe { InlineSize::transmute_from_u8(src.len() as u8) },
buf,
})
} else {
let out = src.as_bytes().iter().map(map).collect();
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
unsafe { String::from_utf8_unchecked(out).into() }
}
}
/// Inline version of std fn `convert_while_ascii`. `s` must have len <= 23.
#[inline]
fn inline_convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> ([u8; INLINE_CAP], &str) {
// Process the input in chunks of 16 bytes to enable auto-vectorization.
// Previously the chunk size depended on the size of `usize`,
// but on 32-bit platforms with sse or neon is also the better choice.
// The only downside on other platforms would be a bit more loop-unrolling.
const N: usize = 16;
debug_assert!(s.len() <= INLINE_CAP, "only for inline-able strings");
let mut slice = s.as_bytes();
let mut out = [0u8; INLINE_CAP];
let mut out_slice = &mut out[..slice.len()];
let mut is_ascii = [false; N];
while slice.len() >= N {
// SAFETY: checked in loop condition
let chunk = unsafe { slice.get_unchecked(..N) };
// SAFETY: out_slice has at least same length as input slice and gets sliced with the same offsets
let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };
for j in 0..N {
is_ascii[j] = chunk[j] <= 127;
}
// Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
// size gives the best result, specifically a pmovmsk instruction on x86.
// See https://github.com/llvm/llvm-project/issues/96395 for why llvm currently does not
// currently recognize other similar idioms.
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
break;
}
for j in 0..N {
out_chunk[j] = convert(&chunk[j]);
}
slice = unsafe { slice.get_unchecked(N..) };
out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
}
// handle the remainder as individual bytes
while !slice.is_empty() {
let byte = slice[0];
if byte > 127 {
break;
}
// SAFETY: out_slice has at least same length as input slice
unsafe {
*out_slice.get_unchecked_mut(0) = convert(&byte);
}
slice = unsafe { slice.get_unchecked(1..) };
out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
}
unsafe {
// SAFETY: we know this is a valid char boundary
// since we only skipped over leading ascii bytes
let rest = core::str::from_utf8_unchecked(slice);
(out, rest)
}
}
impl<T> ToSmolStr for T
where
T: fmt::Display + ?Sized,
{
fn to_smolstr(&self) -> SmolStr {
format_smolstr!("{}", self)
}
}
mod private {
/// No downstream impls allowed.
pub trait Sealed {}
impl Sealed for str {}
}
/// Formats arguments to a [`SmolStr`], potentially without allocating.
///
/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.
#[macro_export]
macro_rules! format_smolstr {
($($tt:tt)*) => {{
let mut w = $crate::SmolStrBuilder::new();
::core::fmt::Write::write_fmt(&mut w, format_args!($($tt)*)).expect("a formatting trait implementation returned an error");
w.finish()
}};
}
/// A builder that can be used to efficiently build a [`SmolStr`].
///
/// This won't allocate if the final string fits into the inline buffer.
#[derive(Clone, Default, Debug, PartialEq, Eq)]
pub struct SmolStrBuilder(SmolStrBuilderRepr);
#[derive(Clone, Debug, PartialEq, Eq)]
enum SmolStrBuilderRepr {
Inline { len: usize, buf: [u8; INLINE_CAP] },
Heap(String),
}
impl Default for SmolStrBuilderRepr {
#[inline]
fn default() -> Self {
SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 }
}
}
impl SmolStrBuilder {
/// Creates a new empty [`SmolStrBuilder`].
#[must_use]
pub const fn new() -> Self {
Self(SmolStrBuilderRepr::Inline { buf: [0; INLINE_CAP], len: 0 })
}
/// Builds a [`SmolStr`] from `self`.
#[must_use]
pub fn finish(&self) -> SmolStr {
SmolStr(match &self.0 {
&SmolStrBuilderRepr::Inline { len, buf } => {
debug_assert!(len <= INLINE_CAP);
Repr::Inline {
// SAFETY: We know that `value.len` is less than or equal to the maximum value of `InlineSize`
len: unsafe { InlineSize::transmute_from_u8(len as u8) },
buf,
}
}
SmolStrBuilderRepr::Heap(heap) => Repr::new(heap),
})
}
/// Appends the given [`char`] to the end of `self`'s buffer.
pub fn push(&mut self, c: char) {
match &mut self.0 {
SmolStrBuilderRepr::Inline { len, buf } => {
let char_len = c.len_utf8();
let new_len = *len + char_len;
if new_len <= INLINE_CAP {
c.encode_utf8(&mut buf[*len..]);
*len += char_len;
} else {
let mut heap = String::with_capacity(new_len);
// copy existing inline bytes over to the heap
// SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
unsafe { heap.as_mut_vec().extend_from_slice(&buf[..*len]) };
heap.push(c);
self.0 = SmolStrBuilderRepr::Heap(heap);
}
}
SmolStrBuilderRepr::Heap(h) => h.push(c),
}
}
/// Appends a given string slice onto the end of `self`'s buffer.
pub fn push_str(&mut self, s: &str) {
match &mut self.0 {
SmolStrBuilderRepr::Inline { len, buf } => {
let old_len = *len;
*len += s.len();
// if the new length will fit on the stack (even if it fills it entirely)
if *len <= INLINE_CAP {
buf[old_len..*len].copy_from_slice(s.as_bytes());
return; // skip the heap push below
}
let mut heap = String::with_capacity(*len);
// copy existing inline bytes over to the heap
// SAFETY: inline data is guaranteed to be valid utf8 for `old_len` bytes
unsafe { heap.as_mut_vec().extend_from_slice(&buf[..old_len]) };
heap.push_str(s);
self.0 = SmolStrBuilderRepr::Heap(heap);
}
SmolStrBuilderRepr::Heap(heap) => heap.push_str(s),
}
}
}
impl fmt::Write for SmolStrBuilder {
#[inline]
fn write_str(&mut self, s: &str) -> fmt::Result {
self.push_str(s);
Ok(())
}
}
impl From<SmolStrBuilder> for SmolStr {
fn from(value: SmolStrBuilder) -> Self {
value.finish()
}
}
#[cfg(feature = "arbitrary")]
#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))]
impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let s = <&str>::arbitrary(u)?;
Ok(SmolStr::new(s))
}
}
#[cfg(feature = "borsh")]
#[cfg_attr(docsrs, doc(cfg(feature = "borsh")))]
mod borsh;
#[cfg(feature = "serde")]
#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
mod serde;
#[test]
fn from_buf_and_chars_size_hinted_heap() {
let str = from_buf_and_chars(
*b"abcdefghijklmnopqr00000",
18,
"_0x1x2x3x4x5x6x7x8x9x10x11x12x13".chars(),
);
assert_eq!(str, "abcdefghijklmnopqr_0x1x2x3x4x5x6x7x8x9x10x11x12x13");
}

94
lib/smol_str/src/serde.rs Normal file
View file

@ -0,0 +1,94 @@
use alloc::{string::String, vec::Vec};
use core::fmt;
use serde::de::{Deserializer, Error, Unexpected, Visitor};
use serde_core as serde;
use crate::SmolStr;
// https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125
fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>
where
D: Deserializer<'de>,
{
struct SmolStrVisitor;
impl<'a> Visitor<'a> for SmolStrVisitor {
type Value = SmolStr;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: Error,
{
Ok(SmolStr::from(v))
}
fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>
where
E: Error,
{
Ok(SmolStr::from(v))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: Error,
{
Ok(SmolStr::from(v))
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: Error,
{
match core::str::from_utf8(v) {
Ok(s) => Ok(SmolStr::from(s)),
Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
}
}
fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>
where
E: Error,
{
match core::str::from_utf8(v) {
Ok(s) => Ok(SmolStr::from(s)),
Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
}
}
fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: Error,
{
match String::from_utf8(v) {
Ok(s) => Ok(SmolStr::from(s)),
Err(e) => Err(Error::invalid_value(Unexpected::Bytes(&e.into_bytes()), &self)),
}
}
}
deserializer.deserialize_str(SmolStrVisitor)
}
impl serde::Serialize for SmolStr {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.as_str().serialize(serializer)
}
}
impl<'de> serde::Deserialize<'de> for SmolStr {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
smol_str(deserializer)
}
}

446
lib/smol_str/tests/test.rs Normal file
View file

@ -0,0 +1,446 @@
#![allow(clippy::disallowed_types)]
use std::sync::Arc;
#[cfg(not(miri))]
use proptest::{prop_assert, prop_assert_eq, proptest};
use smol_str::{SmolStr, SmolStrBuilder};
#[test]
#[cfg(target_pointer_width = "64")]
fn smol_str_is_smol() {
assert_eq!(::std::mem::size_of::<SmolStr>(), ::std::mem::size_of::<String>(),);
}
#[test]
fn assert_traits() {
fn f<T: Send + Sync + ::std::fmt::Debug + Clone>() {}
f::<SmolStr>();
}
#[test]
fn conversions() {
let s: SmolStr = "Hello, World!".into();
let s: String = s.into();
assert_eq!(s, "Hello, World!");
let s: SmolStr = Arc::<str>::from("Hello, World!").into();
let s: Arc<str> = s.into();
assert_eq!(s.as_ref(), "Hello, World!");
}
#[test]
fn const_fn_ctor() {
const EMPTY: SmolStr = SmolStr::new_inline("");
const A: SmolStr = SmolStr::new_inline("A");
const HELLO: SmolStr = SmolStr::new_inline("HELLO");
const LONG: SmolStr = SmolStr::new_inline("ABCDEFGHIZKLMNOPQRSTUVW");
assert_eq!(EMPTY, SmolStr::from(""));
assert_eq!(A, SmolStr::from("A"));
assert_eq!(HELLO, SmolStr::from("HELLO"));
assert_eq!(LONG, SmolStr::from("ABCDEFGHIZKLMNOPQRSTUVW"));
}
#[cfg(not(miri))]
fn check_props(std_str: &str, smol: SmolStr) -> Result<(), proptest::test_runner::TestCaseError> {
prop_assert_eq!(smol.as_str(), std_str);
prop_assert_eq!(smol.len(), std_str.len());
prop_assert_eq!(smol.is_empty(), std_str.is_empty());
if smol.len() <= 23 {
prop_assert!(!smol.is_heap_allocated());
}
Ok(())
}
#[cfg(not(miri))]
proptest! {
#[test]
fn roundtrip(s: String) {
check_props(s.as_str(), SmolStr::new(s.clone()))?;
}
#[test]
fn roundtrip_spaces(s in r"( )*") {
check_props(s.as_str(), SmolStr::new(s.clone()))?;
}
#[test]
fn roundtrip_newlines(s in r"\n*") {
check_props(s.as_str(), SmolStr::new(s.clone()))?;
}
#[test]
fn roundtrip_ws(s in r"( |\n)*") {
check_props(s.as_str(), SmolStr::new(s.clone()))?;
}
#[test]
fn from_string_iter(slices in proptest::collection::vec(".*", 1..100)) {
let string: String = slices.iter().map(|x| x.as_str()).collect();
let smol: SmolStr = slices.into_iter().collect();
check_props(string.as_str(), smol)?;
}
#[test]
fn from_str_iter(slices in proptest::collection::vec(".*", 1..100)) {
let string: String = slices.iter().map(|x| x.as_str()).collect();
let smol: SmolStr = slices.iter().collect();
check_props(string.as_str(), smol)?;
}
}
#[cfg(feature = "serde")]
mod serde_tests {
use super::*;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Serialize, Deserialize)]
struct SmolStrStruct {
pub(crate) s: SmolStr,
pub(crate) vec: Vec<SmolStr>,
pub(crate) map: HashMap<SmolStr, SmolStr>,
}
#[test]
fn test_serde() {
let s = SmolStr::new("Hello, World");
let s = serde_json::to_string(&s).unwrap();
assert_eq!(s, "\"Hello, World\"");
let s: SmolStr = serde_json::from_str(&s).unwrap();
assert_eq!(s, "Hello, World");
}
#[test]
fn test_serde_reader() {
let s = SmolStr::new("Hello, World");
let s = serde_json::to_string(&s).unwrap();
assert_eq!(s, "\"Hello, World\"");
let s: SmolStr = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
assert_eq!(s, "Hello, World");
}
#[test]
fn test_serde_struct() {
let mut map = HashMap::new();
map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
let struct_ = SmolStrStruct {
s: SmolStr::new("Hello, World"),
vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],
map,
};
let s = serde_json::to_string(&struct_).unwrap();
let _new_struct: SmolStrStruct = serde_json::from_str(&s).unwrap();
}
#[test]
fn test_serde_struct_reader() {
let mut map = HashMap::new();
map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
let struct_ = SmolStrStruct {
s: SmolStr::new("Hello, World"),
vec: vec![SmolStr::new("Hello, World"), SmolStr::new("Hello, World")],
map,
};
let s = serde_json::to_string(&struct_).unwrap();
let _new_struct: SmolStrStruct = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
}
#[test]
fn test_serde_hashmap() {
let mut map = HashMap::new();
map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
let s = serde_json::to_string(&map).unwrap();
let _s: HashMap<SmolStr, SmolStr> = serde_json::from_str(&s).unwrap();
}
#[test]
fn test_serde_hashmap_reader() {
let mut map = HashMap::new();
map.insert(SmolStr::new("a"), SmolStr::new("ohno"));
let s = serde_json::to_string(&map).unwrap();
let _s: HashMap<SmolStr, SmolStr> =
serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
}
#[test]
fn test_serde_vec() {
let vec = vec![SmolStr::new(""), SmolStr::new("b")];
let s = serde_json::to_string(&vec).unwrap();
let _s: Vec<SmolStr> = serde_json::from_str(&s).unwrap();
}
#[test]
fn test_serde_vec_reader() {
let vec = vec![SmolStr::new(""), SmolStr::new("b")];
let s = serde_json::to_string(&vec).unwrap();
let _s: Vec<SmolStr> = serde_json::from_reader(std::io::Cursor::new(s)).unwrap();
}
}
#[test]
fn test_search_in_hashmap() {
let mut m = ::std::collections::HashMap::<SmolStr, i32>::new();
m.insert("aaa".into(), 17);
assert_eq!(17, *m.get("aaa").unwrap());
}
#[test]
fn test_from_char_iterator() {
let examples = [
// Simple keyword-like strings
("if", false),
("for", false),
("impl", false),
// Strings containing two-byte characters
("パーティーへ行かないか", true),
("パーティーへ行か", true),
("パーティーへ行_", false),
("和製漢語", false),
("部落格", false),
("사회과학원 어학연구소", true),
// String containing diverse characters
("表ポあA鷗Œé逍Üߪąñ丂㐀𠀀", true),
];
for (raw, is_heap) in &examples {
let s: SmolStr = raw.chars().collect();
assert_eq!(s.as_str(), *raw);
assert_eq!(s.is_heap_allocated(), *is_heap);
}
// String which has too many characters to even consider inlining: Chars::size_hint uses
// (`len` + 3) / 4. With `len` = 89, this results in 23, so `from_iter` will immediately
// heap allocate
let raw = "a".repeat(23 * 4 + 1);
let s: SmolStr = raw.chars().collect();
assert_eq!(s.as_str(), raw);
assert!(s.is_heap_allocated());
}
#[test]
fn test_bad_size_hint_char_iter() {
struct BadSizeHint<I>(I);
impl<T, I: Iterator<Item = T>> Iterator for BadSizeHint<I> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
(1024, None)
}
}
let data = "testing";
let collected: SmolStr = BadSizeHint(data.chars()).collect();
let new = SmolStr::new(data);
assert!(!collected.is_heap_allocated());
assert!(!new.is_heap_allocated());
assert_eq!(new, collected);
}
#[test]
fn test_to_smolstr() {
use smol_str::ToSmolStr;
for i in 0..26 {
let a = &"abcdefghijklmnopqrstuvwxyz"[i..];
assert_eq!(a, a.to_smolstr());
assert_eq!(a, smol_str::format_smolstr!("{}", a));
}
}
#[test]
fn test_builder_push_str() {
//empty
let builder = SmolStrBuilder::new();
assert_eq!("", builder.finish());
// inline push
let mut builder = SmolStrBuilder::new();
builder.push_str("a");
builder.push_str("b");
let s = builder.finish();
assert!(!s.is_heap_allocated());
assert_eq!("ab", s);
// inline max push
let mut builder = SmolStrBuilder::new();
builder.push_str(&"a".repeat(23));
let s = builder.finish();
assert!(!s.is_heap_allocated());
assert_eq!("a".repeat(23), s);
// heap push immediate
let mut builder = SmolStrBuilder::new();
builder.push_str(&"a".repeat(24));
let s = builder.finish();
assert!(s.is_heap_allocated());
assert_eq!("a".repeat(24), s);
// heap push succession
let mut builder = SmolStrBuilder::new();
builder.push_str(&"a".repeat(23));
builder.push_str(&"a".repeat(23));
let s = builder.finish();
assert!(s.is_heap_allocated());
assert_eq!("a".repeat(46), s);
// heap push on multibyte char
let mut builder = SmolStrBuilder::new();
builder.push_str("ohnonononononononono!");
builder.push('🤯');
let s = builder.finish();
assert!(s.is_heap_allocated());
assert_eq!("ohnonononononononono!🤯", s);
}
#[test]
fn test_builder_push() {
//empty
let builder = SmolStrBuilder::new();
assert_eq!("", builder.finish());
// inline push
let mut builder = SmolStrBuilder::new();
builder.push('a');
builder.push('b');
let s = builder.finish();
assert!(!s.is_heap_allocated());
assert_eq!("ab", s);
// inline max push
let mut builder = SmolStrBuilder::new();
for _ in 0..23 {
builder.push('a');
}
let s = builder.finish();
assert!(!s.is_heap_allocated());
assert_eq!("a".repeat(23), s);
// heap push
let mut builder = SmolStrBuilder::new();
for _ in 0..24 {
builder.push('a');
}
let s = builder.finish();
assert!(s.is_heap_allocated());
assert_eq!("a".repeat(24), s);
}
#[cfg(test)]
mod test_str_ext {
use smol_str::StrExt;
#[test]
fn large() {
let lowercase = "aaaaaaAAAAAaaaaaaaaaaaaaaaaaaaaaAAAAaaaaaaaaaaaaaa".to_lowercase_smolstr();
assert_eq!(lowercase, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
assert!(lowercase.is_heap_allocated());
}
#[test]
fn to_lowercase() {
let lowercase = "aßΔC".to_lowercase_smolstr();
assert_eq!(lowercase, "aßδc");
assert!(!lowercase.is_heap_allocated());
}
#[test]
fn to_uppercase() {
let uppercase = "aßΔC".to_uppercase_smolstr();
assert_eq!(uppercase, "ASSΔC");
assert!(!uppercase.is_heap_allocated());
}
#[test]
fn to_ascii_lowercase() {
let uppercase = "aßΔC".to_ascii_lowercase_smolstr();
assert_eq!(uppercase, "aßΔc");
assert!(!uppercase.is_heap_allocated());
}
#[test]
fn to_ascii_uppercase() {
let uppercase = "aßΔC".to_ascii_uppercase_smolstr();
assert_eq!(uppercase, "AßΔC");
assert!(!uppercase.is_heap_allocated());
}
#[test]
fn replace() {
let result = "foo_bar_baz".replace_smolstr("ba", "do");
assert_eq!(result, "foo_dor_doz");
assert!(!result.is_heap_allocated());
}
#[test]
fn replacen() {
let result = "foo_bar_baz".replacen_smolstr("ba", "do", 1);
assert_eq!(result, "foo_dor_baz");
assert!(!result.is_heap_allocated());
}
#[test]
fn replacen_1_ascii() {
let result = "foo_bar_baz".replacen_smolstr("o", "u", 1);
assert_eq!(result, "fuo_bar_baz");
assert!(!result.is_heap_allocated());
}
}
#[cfg(feature = "borsh")]
mod borsh_tests {
use borsh::BorshDeserialize;
use smol_str::{SmolStr, ToSmolStr};
use std::io::Cursor;
#[test]
fn borsh_serialize_stack() {
let smolstr_on_stack = "aßΔCaßδc".to_smolstr();
let mut buffer = Vec::new();
borsh::BorshSerialize::serialize(&smolstr_on_stack, &mut buffer).unwrap();
let mut cursor = Cursor::new(buffer);
let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();
assert_eq!(smolstr_on_stack, decoded);
}
#[test]
fn borsh_serialize_heap() {
let smolstr_on_heap = "aßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδcaßΔCaßδc".to_smolstr();
let mut buffer = Vec::new();
borsh::BorshSerialize::serialize(&smolstr_on_heap, &mut buffer).unwrap();
let mut cursor = Cursor::new(buffer);
let decoded: SmolStr = borsh::BorshDeserialize::deserialize_reader(&mut cursor).unwrap();
assert_eq!(smolstr_on_heap, decoded);
}
#[test]
fn borsh_non_utf8_stack() {
let invalid_utf8: Vec<u8> = vec![0xF0, 0x9F, 0x8F]; // Incomplete UTF-8 sequence
let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });
let mut buffer = Vec::new();
borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();
let mut cursor = Cursor::new(buffer);
let result = SmolStr::deserialize_reader(&mut cursor);
assert!(result.is_err());
}
#[test]
fn borsh_non_utf8_heap() {
let invalid_utf8: Vec<u8> = vec![
0xC1, 0x8A, 0x5F, 0xE2, 0x3A, 0x9E, 0x3B, 0xAA, 0x01, 0x08, 0x6F, 0x2F, 0xC0, 0x32,
0xAB, 0xE1, 0x9A, 0x2F, 0x4A, 0x3F, 0x25, 0x0D, 0x8A, 0x2A, 0x19, 0x11, 0xF0, 0x7F,
0x0E, 0x80,
];
let wrong_utf8 = SmolStr::from(unsafe { String::from_utf8_unchecked(invalid_utf8) });
let mut buffer = Vec::new();
borsh::BorshSerialize::serialize(&wrong_utf8, &mut buffer).unwrap();
let mut cursor = Cursor::new(buffer);
let result = SmolStr::deserialize_reader(&mut cursor);
assert!(result.is_err());
}
}

View file

@ -0,0 +1,48 @@
#![allow(clippy::disallowed_methods, clippy::print_stdout)]
#![cfg(not(miri))]
use std::{
env,
path::{Path, PathBuf},
process::{Command, Stdio},
};
fn project_root() -> PathBuf {
PathBuf::from(
env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| env!("CARGO_MANIFEST_DIR").to_owned()),
)
}
fn run(cmd: &str, dir: impl AsRef<Path>) -> Result<(), ()> {
let mut args: Vec<_> = cmd.split_whitespace().collect();
let bin = args.remove(0);
println!("> {}", cmd);
let output = Command::new(bin)
.args(args)
.current_dir(dir)
.stdin(Stdio::null())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.output()
.map_err(drop)?;
if output.status.success() {
Ok(())
} else {
let stdout = String::from_utf8(output.stdout).map_err(drop)?;
print!("{}", stdout);
Err(())
}
}
#[test]
fn check_code_formatting() {
let dir = project_root();
if run("rustfmt +stable --version", &dir).is_err() {
panic!(
"failed to run rustfmt from toolchain 'stable'; \
please run `rustup component add rustfmt --toolchain stable` to install it.",
);
}
if run("cargo +stable fmt -- --check", &dir).is_err() {
panic!("code is not properly formatted; please format the code by running `cargo fmt`")
}
}

View file

@ -0,0 +1,26 @@
# Changelog
## 1.1.0
* add `TextRange::ordering` method
## 1.0.0 :tada:
* the carate is renamed to `text-size` from `text_unit`
Transition table:
- `TextUnit::of_char(c)``TextSize::of(c)`
- `TextUnit::of_str(s)``TextSize::of(s)`
- `TextUnit::from_usize(size)``TextSize::try_from(size).unwrap_or_else(|| panic!(_))`
- `unit.to_usize()``usize::from(size)`
- `TextRange::from_to(from, to)``TextRange::new(from, to)`
- `TextRange::offset_len(offset, size)``TextRange::from_len(offset, size)`
- `range.start()``range.start()`
- `range.end()``range.end()`
- `range.len()``range.len()`
- `range.is_empty()``range.is_empty()`
- `a.is_subrange(b)``b.contains_range(a)`
- `a.intersection(b)``a.intersect(b)`
- `a.extend_to(b)``a.cover(b)`
- `range.contains(offset)``range.contains(point)`
- `range.contains_inclusive(offset)``range.contains_inclusive(point)`

28
lib/text-size/Cargo.toml Normal file
View file

@ -0,0 +1,28 @@
[package]
name = "text-size"
version = "1.1.1"
edition = "2024"
authors = [
"Aleksey Kladov <aleksey.kladov@gmail.com>",
"Christopher Durham (CAD97) <cad97@cad97.com>"
]
description = "Newtypes for text offsets"
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-analyzer/text-size"
documentation = "https://docs.rs/text-size"
[dependencies]
serde = { version = "1.0", optional = true, default-features = false }
[dev-dependencies]
serde_test = "1.0"
static_assertions = "1.1"
[[test]]
name = "serde"
path = "tests/serde.rs"
required-features = ["serde"]
[lints]
workspace = true

View file

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

23
lib/text-size/LICENSE-MIT Normal file
View file

@ -0,0 +1,23 @@
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

27
lib/text-size/README.md Normal file
View file

@ -0,0 +1,27 @@
# text-size
[![Build Status](https://travis-ci.org/matklad/text-size.svg?branch=master)](https://travis-ci.org/matklad/text-size)
[![Crates.io](https://img.shields.io/crates/v/text-size.svg)](https://crates.io/crates/text-size)
[![API reference](https://docs.rs/text-size/badge.svg)](https://docs.rs/text-size/)
A library that provides newtype wrappers for `u32` and `(u32, u32)` for use as text offsets.
See the [docs](https://docs.rs/text-size/) for more.
## License
Licensed under either of
* Apache License, Version 2.0
([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
* MIT license
([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
at your option.
## Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.

32
lib/text-size/src/lib.rs Normal file
View file

@ -0,0 +1,32 @@
//! Newtypes for working with text sizes/ranges in a more type-safe manner.
//!
//! This library can help with two things:
//! * Reducing storage requirements for offsets and ranges, under the
//! assumption that 32 bits is enough.
//! * Providing standard vocabulary types for applications where text ranges
//! are pervasive.
//!
//! However, you should not use this library simply because you work with
//! strings. In the overwhelming majority of cases, using `usize` and
//! `std::ops::Range<usize>` is better. In particular, if you are publishing a
//! library, using only std types in the interface would make it more
//! interoperable. Similarly, if you are writing something like a lexer, which
//! produces, but does not *store* text ranges, then sticking to `usize` would
//! be better.
//!
//! Minimal Supported Rust Version: latest stable.
#![forbid(unsafe_code)]
#![warn(missing_debug_implementations, missing_docs)]
mod range;
mod size;
mod traits;
#[cfg(feature = "serde")]
mod serde_impls;
pub use crate::{range::TextRange, size::TextSize, traits::TextLen};
#[cfg(target_pointer_width = "16")]
compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets");

446
lib/text-size/src/range.rs Normal file
View file

@ -0,0 +1,446 @@
use cmp::Ordering;
use {
crate::TextSize,
std::{
cmp, fmt,
ops::{Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, Sub, SubAssign},
},
};
/// A range in text, represented as a pair of [`TextSize`][struct@TextSize].
///
/// It is a logic error for `start` to be greater than `end`.
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)]
pub struct TextRange {
// Invariant: start <= end
start: TextSize,
end: TextSize,
}
impl fmt::Debug for TextRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}..{}", self.start().raw, self.end().raw)
}
}
impl TextRange {
/// Creates a new `TextRange` with the given `start` and `end` (`start..end`).
///
/// # Panics
///
/// Panics if `end < start`.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let start = TextSize::from(5);
/// let end = TextSize::from(10);
/// let range = TextRange::new(start, end);
///
/// assert_eq!(range.start(), start);
/// assert_eq!(range.end(), end);
/// assert_eq!(range.len(), end - start);
/// ```
#[inline]
pub const fn new(start: TextSize, end: TextSize) -> TextRange {
assert!(start.raw <= end.raw);
TextRange { start, end }
}
/// Create a new `TextRange` with the given `offset` and `len` (`offset..offset + len`).
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let text = "0123456789";
///
/// let offset = TextSize::from(2);
/// let length = TextSize::from(5);
/// let range = TextRange::at(offset, length);
///
/// assert_eq!(range, TextRange::new(offset, offset + length));
/// assert_eq!(&text[range], "23456")
/// ```
#[inline]
pub const fn at(offset: TextSize, len: TextSize) -> TextRange {
TextRange::new(offset, TextSize::new(offset.raw + len.raw))
}
/// Create a zero-length range at the specified offset (`offset..offset`).
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let point: TextSize;
/// # point = TextSize::from(3);
/// let range = TextRange::empty(point);
/// assert!(range.is_empty());
/// assert_eq!(range, TextRange::new(point, point));
/// ```
#[inline]
pub const fn empty(offset: TextSize) -> TextRange {
TextRange { start: offset, end: offset }
}
/// Create a range up to the given end (`..end`).
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let point: TextSize;
/// # point = TextSize::from(12);
/// let range = TextRange::up_to(point);
///
/// assert_eq!(range.len(), point);
/// assert_eq!(range, TextRange::new(0.into(), point));
/// assert_eq!(range, TextRange::at(0.into(), point));
/// ```
#[inline]
pub const fn up_to(end: TextSize) -> TextRange {
TextRange { start: TextSize::new(0), end }
}
}
/// Identity methods.
impl TextRange {
/// The start point of this range.
#[inline]
pub const fn start(self) -> TextSize {
self.start
}
/// The end point of this range.
#[inline]
pub const fn end(self) -> TextSize {
self.end
}
/// The size of this range.
#[inline]
pub const fn len(self) -> TextSize {
// HACK for const fn: math on primitives only
TextSize { raw: self.end().raw - self.start().raw }
}
/// Check if this range is empty.
#[inline]
pub const fn is_empty(self) -> bool {
// HACK for const fn: math on primitives only
self.start().raw == self.end().raw
}
}
/// Manipulation methods.
impl TextRange {
/// Check if this range contains an offset.
///
/// The end index is considered excluded.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let (start, end): (TextSize, TextSize);
/// # start = 10.into(); end = 20.into();
/// let range = TextRange::new(start, end);
/// assert!(range.contains(start));
/// assert!(!range.contains(end));
/// ```
#[inline]
pub fn contains(self, offset: TextSize) -> bool {
self.start() <= offset && offset < self.end()
}
/// Check if this range contains an offset.
///
/// The end index is considered included.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let (start, end): (TextSize, TextSize);
/// # start = 10.into(); end = 20.into();
/// let range = TextRange::new(start, end);
/// assert!(range.contains_inclusive(start));
/// assert!(range.contains_inclusive(end));
/// ```
#[inline]
pub fn contains_inclusive(self, offset: TextSize) -> bool {
self.start() <= offset && offset <= self.end()
}
/// Check if this range completely contains another range.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let larger = TextRange::new(0.into(), 20.into());
/// let smaller = TextRange::new(5.into(), 15.into());
/// assert!(larger.contains_range(smaller));
/// assert!(!smaller.contains_range(larger));
///
/// // a range always contains itself
/// assert!(larger.contains_range(larger));
/// assert!(smaller.contains_range(smaller));
/// ```
#[inline]
pub fn contains_range(self, other: TextRange) -> bool {
self.start() <= other.start() && other.end() <= self.end()
}
/// The range covered by both ranges, if it exists.
/// If the ranges touch but do not overlap, the output range is empty.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// assert_eq!(
/// TextRange::intersect(
/// TextRange::new(0.into(), 10.into()),
/// TextRange::new(5.into(), 15.into()),
/// ),
/// Some(TextRange::new(5.into(), 10.into())),
/// );
/// ```
#[inline]
pub fn intersect(self, other: TextRange) -> Option<TextRange> {
let start = cmp::max(self.start(), other.start());
let end = cmp::min(self.end(), other.end());
if end < start {
return None;
}
Some(TextRange::new(start, end))
}
/// Extends the range to cover `other` as well.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// assert_eq!(
/// TextRange::cover(
/// TextRange::new(0.into(), 5.into()),
/// TextRange::new(15.into(), 20.into()),
/// ),
/// TextRange::new(0.into(), 20.into()),
/// );
/// ```
#[inline]
pub fn cover(self, other: TextRange) -> TextRange {
let start = cmp::min(self.start(), other.start());
let end = cmp::max(self.end(), other.end());
TextRange::new(start, end)
}
/// Extends the range to cover `other` offsets as well.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// assert_eq!(
/// TextRange::empty(0.into()).cover_offset(20.into()),
/// TextRange::new(0.into(), 20.into()),
/// )
/// ```
#[inline]
pub fn cover_offset(self, offset: TextSize) -> TextRange {
self.cover(TextRange::empty(offset))
}
/// Add an offset to this range.
///
/// Note that this is not appropriate for changing where a `TextRange` is
/// within some string; rather, it is for changing the reference anchor
/// that the `TextRange` is measured against.
///
/// The unchecked version (`Add::add`) will _always_ panic on overflow,
/// in contrast to primitive integers, which check in debug mode only.
#[inline]
pub fn checked_add(self, offset: TextSize) -> Option<TextRange> {
Some(TextRange {
start: self.start.checked_add(offset)?,
end: self.end.checked_add(offset)?,
})
}
/// Subtract an offset from this range.
///
/// Note that this is not appropriate for changing where a `TextRange` is
/// within some string; rather, it is for changing the reference anchor
/// that the `TextRange` is measured against.
///
/// The unchecked version (`Sub::sub`) will _always_ panic on overflow,
/// in contrast to primitive integers, which check in debug mode only.
#[inline]
pub fn checked_sub(self, offset: TextSize) -> Option<TextRange> {
Some(TextRange {
start: self.start.checked_sub(offset)?,
end: self.end.checked_sub(offset)?,
})
}
/// Relative order of the two ranges (overlapping ranges are considered
/// equal).
///
///
/// This is useful when, for example, binary searching an array of disjoint
/// ranges.
///
/// # Examples
///
/// ```
/// # use text_size::*;
/// # use std::cmp::Ordering;
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(4.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Less);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(3.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Less);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(2.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Equal);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(2.into(), 2.into());
/// assert_eq!(a.ordering(b), Ordering::Equal);
///
/// let a = TextRange::new(2.into(), 3.into());
/// let b = TextRange::new(2.into(), 2.into());
/// assert_eq!(a.ordering(b), Ordering::Greater);
/// ```
#[inline]
pub fn ordering(self, other: TextRange) -> Ordering {
if self.end() <= other.start() {
Ordering::Less
} else if other.end() <= self.start() {
Ordering::Greater
} else {
Ordering::Equal
}
}
}
impl Index<TextRange> for str {
type Output = str;
#[inline]
fn index(&self, index: TextRange) -> &str {
&self[Range::<usize>::from(index)]
}
}
impl Index<TextRange> for String {
type Output = str;
#[inline]
fn index(&self, index: TextRange) -> &str {
&self[Range::<usize>::from(index)]
}
}
impl IndexMut<TextRange> for str {
#[inline]
fn index_mut(&mut self, index: TextRange) -> &mut str {
&mut self[Range::<usize>::from(index)]
}
}
impl IndexMut<TextRange> for String {
#[inline]
fn index_mut(&mut self, index: TextRange) -> &mut str {
&mut self[Range::<usize>::from(index)]
}
}
impl RangeBounds<TextSize> for TextRange {
fn start_bound(&self) -> Bound<&TextSize> {
Bound::Included(&self.start)
}
fn end_bound(&self) -> Bound<&TextSize> {
Bound::Excluded(&self.end)
}
}
impl<T> From<TextRange> for Range<T>
where
T: From<TextSize>,
{
#[inline]
fn from(r: TextRange) -> Self {
r.start().into()..r.end().into()
}
}
macro_rules! ops {
(impl $Op:ident for TextRange by fn $f:ident = $op:tt) => {
impl $Op<&TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn $f(self, other: &TextSize) -> TextRange {
self $op *other
}
}
impl<T> $Op<T> for &TextRange
where
TextRange: $Op<T, Output=TextRange>,
{
type Output = TextRange;
#[inline]
fn $f(self, other: T) -> TextRange {
*self $op other
}
}
};
}
impl Add<TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn add(self, offset: TextSize) -> TextRange {
self.checked_add(offset).expect("TextRange +offset overflowed")
}
}
impl Sub<TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn sub(self, offset: TextSize) -> TextRange {
self.checked_sub(offset).expect("TextRange -offset overflowed")
}
}
ops!(impl Add for TextRange by fn add = +);
ops!(impl Sub for TextRange by fn sub = -);
impl<A> AddAssign<A> for TextRange
where
TextRange: Add<A, Output = TextRange>,
{
#[inline]
fn add_assign(&mut self, rhs: A) {
*self = *self + rhs
}
}
impl<S> SubAssign<S> for TextRange
where
TextRange: Sub<S, Output = TextRange>,
{
#[inline]
fn sub_assign(&mut self, rhs: S) {
*self = *self - rhs
}
}

View file

@ -0,0 +1,45 @@
use {
crate::{TextRange, TextSize},
serde::{Deserialize, Deserializer, Serialize, Serializer, de},
};
impl Serialize for TextSize {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
self.raw.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TextSize {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
u32::deserialize(deserializer).map(TextSize::from)
}
}
impl Serialize for TextRange {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
(self.start(), self.end()).serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TextRange {
#[allow(clippy::nonminimal_bool)]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let (start, end) = Deserialize::deserialize(deserializer)?;
if !(start <= end) {
return Err(de::Error::custom(format!("invalid range: {:?}..{:?}", start, end)));
}
Ok(TextRange::new(start, end))
}
}

172
lib/text-size/src/size.rs Normal file
View file

@ -0,0 +1,172 @@
use {
crate::TextLen,
std::{
convert::TryFrom,
fmt, iter,
num::TryFromIntError,
ops::{Add, AddAssign, Sub, SubAssign},
},
};
/// A measure of text length. Also, equivalently, an index into text.
///
/// This is a UTF-8 bytes offset stored as `u32`, but
/// most clients should treat it as an opaque measure.
///
/// For cases that need to escape `TextSize` and return to working directly
/// with primitive integers, `TextSize` can be converted losslessly to/from
/// `u32` via [`From`] conversions as well as losslessly be converted [`Into`]
/// `usize`. The `usize -> TextSize` direction can be done via [`TryFrom`].
///
/// These escape hatches are primarily required for unit testing and when
/// converting from UTF-8 size to another coordinate space, such as UTF-16.
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TextSize {
pub(crate) raw: u32,
}
impl fmt::Debug for TextSize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.raw)
}
}
impl TextSize {
/// Creates a new instance of `TextSize` from a raw `u32`.
#[inline]
pub const fn new(raw: u32) -> TextSize {
TextSize { raw }
}
/// The text size of some primitive text-like object.
///
/// Accepts `char`, `&str`, and `&String`.
///
/// # Examples
///
/// ```rust
/// # use text_size::*;
/// let char_size = TextSize::of('🦀');
/// assert_eq!(char_size, TextSize::from(4));
///
/// let str_size = TextSize::of("rust-analyzer");
/// assert_eq!(str_size, TextSize::from(13));
/// ```
#[inline]
pub fn of<T: TextLen>(text: T) -> TextSize {
text.text_len()
}
}
/// Methods to act like a primitive integer type, where reasonably applicable.
// Last updated for parity with Rust 1.42.0.
impl TextSize {
/// Checked addition. Returns `None` if overflow occurred.
#[inline]
pub const fn checked_add(self, rhs: TextSize) -> Option<TextSize> {
match self.raw.checked_add(rhs.raw) {
Some(raw) => Some(TextSize { raw }),
None => None,
}
}
/// Checked subtraction. Returns `None` if overflow occurred.
#[inline]
pub const fn checked_sub(self, rhs: TextSize) -> Option<TextSize> {
match self.raw.checked_sub(rhs.raw) {
Some(raw) => Some(TextSize { raw }),
None => None,
}
}
}
impl From<u32> for TextSize {
#[inline]
fn from(raw: u32) -> Self {
TextSize { raw }
}
}
impl From<TextSize> for u32 {
#[inline]
fn from(value: TextSize) -> Self {
value.raw
}
}
impl TryFrom<usize> for TextSize {
type Error = TryFromIntError;
#[inline]
fn try_from(value: usize) -> Result<Self, TryFromIntError> {
Ok(u32::try_from(value)?.into())
}
}
impl From<TextSize> for usize {
#[inline]
fn from(value: TextSize) -> Self {
value.raw as usize
}
}
macro_rules! ops {
(impl $Op:ident for TextSize by fn $f:ident = $op:tt) => {
impl $Op<TextSize> for TextSize {
type Output = TextSize;
#[inline]
fn $f(self, other: TextSize) -> TextSize {
TextSize { raw: self.raw $op other.raw }
}
}
impl $Op<&TextSize> for TextSize {
type Output = TextSize;
#[inline]
fn $f(self, other: &TextSize) -> TextSize {
self $op *other
}
}
impl<T> $Op<T> for &TextSize
where
TextSize: $Op<T, Output=TextSize>,
{
type Output = TextSize;
#[inline]
fn $f(self, other: T) -> TextSize {
*self $op other
}
}
};
}
ops!(impl Add for TextSize by fn add = +);
ops!(impl Sub for TextSize by fn sub = -);
impl<A> AddAssign<A> for TextSize
where
TextSize: Add<A, Output = TextSize>,
{
#[inline]
fn add_assign(&mut self, rhs: A) {
*self = *self + rhs
}
}
impl<S> SubAssign<S> for TextSize
where
TextSize: Sub<S, Output = TextSize>,
{
#[inline]
fn sub_assign(&mut self, rhs: S) {
*self = *self - rhs
}
}
impl<A> iter::Sum<A> for TextSize
where
TextSize: Add<A, Output = TextSize>,
{
#[inline]
fn sum<I: Iterator<Item = A>>(iter: I) -> TextSize {
iter.fold(0.into(), Add::add)
}
}

View file

@ -0,0 +1,36 @@
use {crate::TextSize, std::convert::TryInto};
use priv_in_pub::Sealed;
mod priv_in_pub {
pub trait Sealed {}
}
/// Primitives with a textual length that can be passed to [`TextSize::of`].
pub trait TextLen: Copy + Sealed {
/// The textual length of this primitive.
fn text_len(self) -> TextSize;
}
impl Sealed for &'_ str {}
impl TextLen for &'_ str {
#[inline]
fn text_len(self) -> TextSize {
self.len().try_into().unwrap()
}
}
impl Sealed for &'_ String {}
impl TextLen for &'_ String {
#[inline]
fn text_len(self) -> TextSize {
self.as_str().text_len()
}
}
impl Sealed for char {}
impl TextLen for char {
#[inline]
fn text_len(self) -> TextSize {
(self.len_utf8() as u32).into()
}
}

View file

@ -0,0 +1,18 @@
use {
static_assertions::*,
std::{
fmt::Debug,
hash::Hash,
marker::{Send, Sync},
panic::{RefUnwindSafe, UnwindSafe},
},
text_size::*,
};
// auto traits
assert_impl_all!(TextSize: Send, Sync, Unpin, UnwindSafe, RefUnwindSafe);
assert_impl_all!(TextRange: Send, Sync, Unpin, UnwindSafe, RefUnwindSafe);
// common traits
assert_impl_all!(TextSize: Copy, Debug, Default, Hash, Ord);
assert_impl_all!(TextRange: Copy, Debug, Default, Hash, Eq);

View file

@ -0,0 +1,24 @@
use text_size::TextSize;
#[derive(Copy, Clone)]
struct BadRope<'a>(&'a [&'a str]);
impl BadRope<'_> {
fn text_len(self) -> TextSize {
self.0.iter().copied().map(TextSize::of).sum()
}
}
#[test]
fn main() {
let x: char = 'c';
let _ = TextSize::of(x);
let x: &str = "hello";
let _ = TextSize::of(x);
let x: &String = &"hello".into();
let _ = TextSize::of(x);
let _ = BadRope(&[""]).text_len();
}

View file

@ -0,0 +1,8 @@
use text_size::*;
#[test]
fn main() {
let range = TextRange::default();
_ = &""[range];
_ = &String::new()[range];
}

View file

@ -0,0 +1,76 @@
use {std::ops, text_size::*};
fn size(x: u32) -> TextSize {
TextSize::from(x)
}
fn range(x: ops::Range<u32>) -> TextRange {
TextRange::new(x.start.into(), x.end.into())
}
#[test]
fn sum() {
let xs: Vec<TextSize> = vec![size(0), size(1), size(2)];
assert_eq!(xs.iter().sum::<TextSize>(), size(3));
assert_eq!(xs.into_iter().sum::<TextSize>(), size(3));
}
#[test]
fn math() {
assert_eq!(size(10) + size(5), size(15));
assert_eq!(size(10) - size(5), size(5));
}
#[test]
fn checked_math() {
assert_eq!(size(1).checked_add(size(1)), Some(size(2)));
assert_eq!(size(1).checked_sub(size(1)), Some(size(0)));
assert_eq!(size(1).checked_sub(size(2)), None);
assert_eq!(size(!0).checked_add(size(1)), None);
}
#[test]
#[rustfmt::skip]
fn contains() {
assert!( range(2..4).contains_range(range(2..3)));
assert!( ! range(2..4).contains_range(range(1..3)));
}
#[test]
fn intersect() {
assert_eq!(range(1..2).intersect(range(2..3)), Some(range(2..2)));
assert_eq!(range(1..5).intersect(range(2..3)), Some(range(2..3)));
assert_eq!(range(1..2).intersect(range(3..4)), None);
}
#[test]
fn cover() {
assert_eq!(range(1..2).cover(range(2..3)), range(1..3));
assert_eq!(range(1..5).cover(range(2..3)), range(1..5));
assert_eq!(range(1..2).cover(range(4..5)), range(1..5));
}
#[test]
fn cover_offset() {
assert_eq!(range(1..3).cover_offset(size(0)), range(0..3));
assert_eq!(range(1..3).cover_offset(size(1)), range(1..3));
assert_eq!(range(1..3).cover_offset(size(2)), range(1..3));
assert_eq!(range(1..3).cover_offset(size(3)), range(1..3));
assert_eq!(range(1..3).cover_offset(size(4)), range(1..4));
}
#[test]
#[rustfmt::skip]
fn contains_point() {
assert!( ! range(1..3).contains(size(0)));
assert!( range(1..3).contains(size(1)));
assert!( range(1..3).contains(size(2)));
assert!( ! range(1..3).contains(size(3)));
assert!( ! range(1..3).contains(size(4)));
assert!( ! range(1..3).contains_inclusive(size(0)));
assert!( range(1..3).contains_inclusive(size(1)));
assert!( range(1..3).contains_inclusive(size(2)));
assert!( range(1..3).contains_inclusive(size(3)));
assert!( ! range(1..3).contains_inclusive(size(4)));
}

View file

@ -0,0 +1,49 @@
use {serde_test::*, std::ops, text_size::*};
fn size(x: u32) -> TextSize {
TextSize::from(x)
}
fn range(x: ops::Range<u32>) -> TextRange {
TextRange::new(x.start.into(), x.end.into())
}
#[test]
fn size_serialization() {
assert_tokens(&size(00), &[Token::U32(00)]);
assert_tokens(&size(10), &[Token::U32(10)]);
assert_tokens(&size(20), &[Token::U32(20)]);
assert_tokens(&size(30), &[Token::U32(30)]);
}
#[test]
fn range_serialization() {
assert_tokens(
&range(00..10),
&[Token::Tuple { len: 2 }, Token::U32(00), Token::U32(10), Token::TupleEnd],
);
assert_tokens(
&range(10..20),
&[Token::Tuple { len: 2 }, Token::U32(10), Token::U32(20), Token::TupleEnd],
);
assert_tokens(
&range(20..30),
&[Token::Tuple { len: 2 }, Token::U32(20), Token::U32(30), Token::TupleEnd],
);
assert_tokens(
&range(30..40),
&[Token::Tuple { len: 2 }, Token::U32(30), Token::U32(40), Token::TupleEnd],
);
}
#[test]
fn invalid_range_deserialization() {
assert_tokens::<TextRange>(
&range(62..92),
&[Token::Tuple { len: 2 }, Token::U32(62), Token::U32(92), Token::TupleEnd],
);
assert_de_tokens_error::<TextRange>(
&[Token::Tuple { len: 2 }, Token::U32(92), Token::U32(62), Token::TupleEnd],
"invalid range: 92..62",
);
}

13
lib/ungrammar/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "ungrammar"
description = "A DSL for describing concrete syntax trees"
version = "1.16.1"
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-analyzer/ungrammar"
edition = "2024"
[dependencies]
# nope
[lints]
workspace = true

21
lib/ungrammar/README.md Normal file
View file

@ -0,0 +1,21 @@
# ungrammar
A DSL for specifying concrete syntax trees.
See the [blog post][post] for an introduction.
See [./rust.ungram](./rust.ungram) for an example.
## Editor support
- Vim
- [vim-ungrammar][]
- [ungrammar.vim][]
- VSCode
- [ungrammar-tools][]
[post]:
https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html
[vim-ungrammar]: https://github.com/Iron-E/vim-ungrammar
[ungrammar.vim]: https://github.com/drtychai/ungrammar.vim
[ungrammar-tools]: https://github.com/azdavis/ungrammar-tools

666
lib/ungrammar/rust.ungram Normal file
View file

@ -0,0 +1,666 @@
// Note this grammar file does not reflect the current language as this file is no longer maintained.
// Rust Un-Grammar.
//
// This grammar specifies the structure of Rust's concrete syntax tree.
// It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
// Tokens are processed -- contextual keywords are recognised, compound operators glued.
//
// Legend:
//
// // -- comment
// Name = -- non-terminal definition
// 'ident' -- token (terminal)
// A B -- sequence
// A | B -- alternation
// A* -- zero or more repetition
// A? -- zero or one repetition
// (A) -- same as A
// label:A -- suggested name for field of AST node
//*************************//
// Names, Paths and Macros //
//*************************//
Name =
'ident' | 'self'
NameRef =
'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self'
Lifetime =
'lifetime_ident'
Path =
(qualifier:Path '::')? segment:PathSegment
PathSegment =
'::'? NameRef
| NameRef GenericArgList?
| NameRef ParamList RetType?
| '<' PathType ('as' PathType)? '>'
GenericArgList =
'::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
GenericArg =
TypeArg
| AssocTypeArg
| LifetimeArg
| ConstArg
TypeArg =
Type
AssocTypeArg =
NameRef GenericParamList? (':' TypeBoundList | '=' Type)
LifetimeArg =
Lifetime
ConstArg =
Expr
MacroCall =
Attr* Path '!' TokenTree ';'?
TokenTree =
'(' ')'
| '{' '}'
| '[' ']'
MacroItems =
Item*
MacroStmts =
statements:Stmt*
Expr?
//*************************//
// Items //
//*************************//
SourceFile =
'shebang'?
Attr*
Item*
Item =
Const
| Enum
| ExternBlock
| ExternCrate
| Fn
| Impl
| MacroCall
| MacroRules
| MacroDef
| Module
| Static
| Struct
| Trait
| TypeAlias
| Union
| Use
MacroRules =
Attr* Visibility?
'macro_rules' '!' Name
TokenTree
MacroDef =
Attr* Visibility?
'macro' Name args:TokenTree?
body:TokenTree
Module =
Attr* Visibility?
'mod' Name
(ItemList | ';')
ItemList =
'{' Attr* Item* '}'
ExternCrate =
Attr* Visibility?
'extern' 'crate' NameRef Rename? ';'
Rename =
'as' (Name | '_')
Use =
Attr* Visibility?
'use' UseTree ';'
UseTree =
(Path? '::')? ('*' | UseTreeList)
| Path Rename?
UseTreeList =
'{' (UseTree (',' UseTree)* ','?)? '}'
Fn =
Attr* Visibility?
'default'? 'const'? 'async'? 'unsafe'? Abi?
'fn' Name GenericParamList? ParamList RetType? WhereClause?
(body:BlockExpr | ';')
Abi =
'extern' 'string'?
ParamList =
'('(
SelfParam
| (SelfParam ',')? (Param (',' Param)* ','?)?
)')'
| '|' (Param (',' Param)* ','?)? '|'
SelfParam =
Attr* (
('&' Lifetime?)? 'mut'? Name
| 'mut'? Name ':' Type
)
Param =
Attr* (
Pat (':' Type)?
| Type
| '...'
)
RetType =
'->' Type
TypeAlias =
Attr* Visibility?
'default'?
'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
('=' Type)? ';'
Struct =
Attr* Visibility?
'struct' Name GenericParamList? (
WhereClause? (RecordFieldList | ';')
| TupleFieldList WhereClause? ';'
)
RecordFieldList =
'{' fields:(RecordField (',' RecordField)* ','?)? '}'
RecordField =
Attr* Visibility?
Name ':' Type
TupleFieldList =
'(' fields:(TupleField (',' TupleField)* ','?)? ')'
TupleField =
Attr* Visibility?
Type
FieldList =
RecordFieldList
| TupleFieldList
Enum =
Attr* Visibility?
'enum' Name GenericParamList? WhereClause?
VariantList
VariantList =
'{' (Variant (',' Variant)* ','?)? '}'
Variant =
Attr* Visibility?
Name FieldList? ('=' Expr)?
Union =
Attr* Visibility?
'union' Name GenericParamList? WhereClause?
RecordFieldList
// A Data Type.
//
// Not used directly in the grammar, but handy to have anyway.
Adt =
Enum
| Struct
| Union
Const =
Attr* Visibility?
'default'?
'const' (Name | '_') ':' Type
('=' body:Expr)? ';'
Static =
Attr* Visibility?
'static' 'mut'? Name ':' Type
('=' body:Expr)? ';'
Trait =
Attr* Visibility?
'unsafe'? 'auto'?
'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
AssocItemList
AssocItemList =
'{' Attr* AssocItem* '}'
AssocItem =
Const
| Fn
| MacroCall
| TypeAlias
Impl =
Attr* Visibility?
'default'? 'unsafe'?
'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
AssocItemList
ExternBlock =
Attr* 'unsafe'? Abi ExternItemList
ExternItemList =
'{' Attr* ExternItem* '}'
ExternItem =
Fn
| MacroCall
| Static
| TypeAlias
GenericParamList =
'<' (GenericParam (',' GenericParam)* ','?)? '>'
GenericParam =
ConstParam
| LifetimeParam
| TypeParam
TypeParam =
Attr* Name (':' TypeBoundList?)?
('=' default_type:Type)?
ConstParam =
Attr* 'const' Name ':' Type
('=' default_val:Expr)?
LifetimeParam =
Attr* Lifetime (':' TypeBoundList?)?
WhereClause =
'where' predicates:(WherePred (',' WherePred)* ','?)
WherePred =
('for' GenericParamList)? (Lifetime | Type) ':' TypeBoundList?
Visibility =
'pub' ('(' 'in'? Path ')')?
Attr =
'#' '!'? '[' Meta ']'
Meta =
Path ('=' Expr | TokenTree)?
//****************************//
// Statements and Expressions //
//****************************//
Stmt =
';'
| ExprStmt
| Item
| LetStmt
LetStmt =
Attr* 'let' Pat (':' Type)?
'=' initializer:Expr
LetElse?
';'
LetElse =
'else' BlockExpr
ExprStmt =
Expr ';'?
Expr =
ArrayExpr
| AwaitExpr
| BinExpr
| BlockExpr
| BoxExpr
| BreakExpr
| CallExpr
| CastExpr
| ClosureExpr
| ContinueExpr
| FieldExpr
| ForExpr
| IfExpr
| IndexExpr
| Literal
| LoopExpr
| MacroCall
| MacroStmts
| MatchExpr
| MethodCallExpr
| ParenExpr
| PathExpr
| PrefixExpr
| RangeExpr
| RecordExpr
| RefExpr
| ReturnExpr
| TryExpr
| TupleExpr
| WhileExpr
| YieldExpr
| LetExpr
| UnderscoreExpr
Literal =
Attr* value:(
'int_number' | 'float_number'
| 'string' | 'raw_string'
| 'byte_string' | 'raw_byte_string'
| 'true' | 'false'
| 'char' | 'byte'
)
PathExpr =
Attr* Path
StmtList =
'{'
Attr*
statements:Stmt*
tail_expr:Expr?
'}'
RefExpr =
Attr* '&' ('raw' | 'mut' | 'const') Expr
TryExpr =
Attr* Expr '?'
BlockExpr =
Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList
PrefixExpr =
Attr* op:('-' | '!' | '*') Expr
BinExpr =
Attr*
lhs:Expr
op:(
'||' | '&&'
| '==' | '!=' | '<=' | '>=' | '<' | '>'
| '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'
| '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='
)
rhs:Expr
CastExpr =
Attr* Expr 'as' Type
ParenExpr =
Attr* '(' Attr* Expr ')'
ArrayExpr =
Attr* '[' Attr* (
(Expr (',' Expr)* ','?)?
| Expr ';' Expr
) ']'
IndexExpr =
Attr* base:Expr '[' index:Expr ']'
TupleExpr =
Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'
RecordExpr =
Path RecordExprFieldList
RecordExprFieldList =
'{'
Attr*
fields:(RecordExprField (',' RecordExprField)* ','?)?
('..' spread:Expr?)?
'}'
RecordExprField =
Attr* (NameRef ':')? Expr
CallExpr =
Attr* Expr ArgList
ArgList =
'(' args:(Expr (',' Expr)* ','?)? ')'
MethodCallExpr =
Attr* receiver:Expr '.' NameRef GenericArgList? ArgList
FieldExpr =
Attr* Expr '.' NameRef
ClosureExpr =
Attr* 'static'? 'async'? 'move'? ParamList RetType?
body:Expr
IfExpr =
Attr* 'if' condition:Expr then_branch:BlockExpr
('else' else_branch:(IfExpr | BlockExpr))?
LoopExpr =
Attr* Label? 'loop'
loop_body:BlockExpr
ForExpr =
Attr* Label? 'for' Pat 'in' iterable:Expr
loop_body:BlockExpr
WhileExpr =
Attr* Label? 'while' condition:Expr
loop_body:BlockExpr
Label =
Lifetime ':'
BreakExpr =
Attr* 'break' Lifetime? Expr?
ContinueExpr =
Attr* 'continue' Lifetime?
RangeExpr =
Attr* start:Expr? op:('..' | '..=') end:Expr?
MatchExpr =
Attr* 'match' Expr MatchArmList
MatchArmList =
'{'
Attr*
arms:MatchArm*
'}'
MatchArm =
Attr* Pat guard:MatchGuard? '=>' Expr ','?
MatchGuard =
'if' condition:Expr
ReturnExpr =
Attr* 'return' Expr?
YieldExpr =
Attr* 'yield' Expr?
LetExpr =
Attr* 'let' Pat '=' Expr
UnderscoreExpr =
Attr* '_'
AwaitExpr =
Attr* Expr '.' 'await'
BoxExpr =
Attr* 'box' Expr
//*************************//
// Types //
//*************************//
Type =
ArrayType
| DynTraitType
| FnPtrType
| ForType
| ImplTraitType
| InferType
| MacroType
| NeverType
| ParenType
| PathType
| PtrType
| RefType
| SliceType
| TupleType
ParenType =
'(' Type ')'
NeverType =
'!'
MacroType =
MacroCall
PathType =
Path
TupleType =
'(' fields:(Type (',' Type)* ','?)? ')'
PtrType =
'*' ('const' | 'mut') Type
RefType =
'&' Lifetime? 'mut'? Type
ArrayType =
'[' Type ';' Expr ']'
SliceType =
'[' Type ']'
InferType =
'_'
FnPtrType =
'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?
ForType =
'for' GenericParamList Type
ImplTraitType =
'impl' TypeBoundList
DynTraitType =
'dyn' TypeBoundList
TypeBoundList =
bounds:(TypeBound ('+' TypeBound)* '+'?)
TypeBound =
Lifetime
| ('?' | '~' 'const')? Type
//************************//
// Patterns //
//************************//
Pat =
IdentPat
| BoxPat
| RestPat
| LiteralPat
| MacroPat
| OrPat
| ParenPat
| PathPat
| WildcardPat
| RangePat
| RecordPat
| RefPat
| SlicePat
| TuplePat
| TupleStructPat
| ConstBlockPat
LiteralPat =
Literal
IdentPat =
Attr* 'ref'? 'mut'? Name ('@' Pat)?
WildcardPat =
'_'
RangePat =
// 1..
start:Pat op:('..' | '..=')
// 1..2
| start:Pat op:('..' | '..=') end:Pat
// ..2
| op:('..' | '..=') end:Pat
RefPat =
'&' 'mut'? Pat
RecordPat =
Path RecordPatFieldList
RecordPatFieldList =
'{'
fields:(RecordPatField (',' RecordPatField)* ','?)?
RestPat?
'}'
RecordPatField =
Attr* (NameRef ':')? Pat
TupleStructPat =
Path '(' fields:(Pat (',' Pat)* ','?)? ')'
TuplePat =
'(' fields:(Pat (',' Pat)* ','?)? ')'
ParenPat =
'(' Pat ')'
SlicePat =
'[' (Pat (',' Pat)* ','?)? ']'
PathPat =
Path
OrPat =
(Pat ('|' Pat)* '|'?)
BoxPat =
'box' Pat
RestPat =
Attr* '..'
MacroPat =
MacroCall
ConstBlockPat =
'const' BlockExpr

View file

@ -0,0 +1,47 @@
//! Boilerplate error definitions.
use std::fmt;
use crate::lexer::Location;
/// A type alias for std's Result with the Error as our error type.
pub type Result<T, E = Error> = std::result::Result<T, E>;
/// An error encountered when parsing a Grammar.
#[derive(Debug)]
pub struct Error {
pub(crate) message: String,
pub(crate) location: Option<Location>,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(loc) = self.location {
// Report 1-based indices, to match text editors
write!(f, "{}:{}: ", loc.line + 1, loc.column + 1)?
}
write!(f, "{}", self.message)
}
}
impl std::error::Error for Error {}
impl Error {
pub(crate) fn with_location(self, location: Location) -> Error {
Error { location: Some(location), ..self }
}
}
macro_rules! _format_err {
($($tt:tt)*) => {
$crate::error::Error {
message: format!($($tt)*),
location: None,
}
};
}
pub(crate) use _format_err as format_err;
macro_rules! _bail {
($($tt:tt)*) => { return Err($crate::error::format_err!($($tt)*)) };
}
pub(crate) use _bail as bail;

129
lib/ungrammar/src/lexer.rs Normal file
View file

@ -0,0 +1,129 @@
//! Simple hand-written ungrammar lexer
use crate::error::{Result, bail};
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum TokenKind {
Node(String),
Token(String),
Eq,
Star,
Pipe,
QMark,
Colon,
LParen,
RParen,
}
#[derive(Debug)]
pub(crate) struct Token {
pub(crate) kind: TokenKind,
pub(crate) loc: Location,
}
#[derive(Copy, Clone, Default, Debug)]
pub(crate) struct Location {
pub(crate) line: usize,
pub(crate) column: usize,
}
impl Location {
fn advance(&mut self, text: &str) {
match text.rfind('\n') {
Some(idx) => {
self.line += text.chars().filter(|&it| it == '\n').count();
self.column = text[idx + 1..].chars().count();
}
None => self.column += text.chars().count(),
}
}
}
pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> {
let mut res = Vec::new();
let mut loc = Location::default();
while !input.is_empty() {
let old_input = input;
skip_ws(&mut input);
skip_comment(&mut input);
if old_input.len() == input.len() {
match advance(&mut input) {
Ok(kind) => {
res.push(Token { kind, loc });
}
Err(err) => return Err(err.with_location(loc)),
}
}
let consumed = old_input.len() - input.len();
loc.advance(&old_input[..consumed]);
}
Ok(res)
}
fn skip_ws(input: &mut &str) {
*input = input.trim_start_matches(is_whitespace)
}
fn skip_comment(input: &mut &str) {
if input.starts_with("//") {
let idx = input.find('\n').map_or(input.len(), |it| it + 1);
*input = &input[idx..]
}
}
fn advance(input: &mut &str) -> Result<TokenKind> {
let mut chars = input.chars();
let c = chars.next().unwrap();
let res = match c {
'=' => TokenKind::Eq,
'*' => TokenKind::Star,
'?' => TokenKind::QMark,
'(' => TokenKind::LParen,
')' => TokenKind::RParen,
'|' => TokenKind::Pipe,
':' => TokenKind::Colon,
'\'' => {
let mut buf = String::new();
loop {
match chars.next() {
None => bail!("unclosed token literal"),
Some('\\') => match chars.next() {
Some(c) if is_escapable(c) => buf.push(c),
_ => bail!("invalid escape in token literal"),
},
Some('\'') => break,
Some(c) => buf.push(c),
}
}
TokenKind::Token(buf)
}
c if is_ident_char(c) => {
let mut buf = String::new();
buf.push(c);
loop {
match chars.clone().next() {
Some(c) if is_ident_char(c) => {
chars.next();
buf.push(c);
}
_ => break,
}
}
TokenKind::Node(buf)
}
'\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"),
c => bail!("unexpected character: `{}`", c),
};
*input = chars.as_str();
Ok(res)
}
fn is_escapable(c: char) -> bool {
matches!(c, '\\' | '\'')
}
fn is_whitespace(c: char) -> bool {
matches!(c, ' ' | '\t' | '\n')
}
fn is_ident_char(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '_')
}

137
lib/ungrammar/src/lib.rs Normal file
View file

@ -0,0 +1,137 @@
//! Ungrammar -- a DSL for specifying concrete syntax tree grammar.
//!
//! Producing a parser is an explicit non-goal -- it's ok for this grammar to be
//! ambiguous, non LL, non LR, etc.
//!
//! See this
//! [introductory post](https://rust-analyzer.github.io/blog/2020/10/24/introducing-ungrammar.html)
//! for details.
#![deny(missing_debug_implementations)]
#![deny(missing_docs)]
#![deny(rust_2018_idioms)]
mod error;
mod lexer;
mod parser;
use std::{ops, str::FromStr};
pub use error::{Error, Result};
/// Returns a Rust grammar.
pub fn rust_grammar() -> Grammar {
let src = include_str!("../rust.ungram");
src.parse().unwrap()
}
/// A node, like `A = 'b' | 'c'`.
///
/// Indexing into a [`Grammar`] with a [`Node`] returns a reference to a
/// [`NodeData`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Node(usize);
/// A token, denoted with single quotes, like `'+'` or `'struct'`.
///
/// Indexing into a [`Grammar`] with a [`Token`] returns a reference to a
/// [`TokenData`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token(usize);
/// An Ungrammar grammar.
#[derive(Default, Debug)]
pub struct Grammar {
nodes: Vec<NodeData>,
tokens: Vec<TokenData>,
}
impl FromStr for Grammar {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
let tokens = lexer::tokenize(s)?;
parser::parse(tokens)
}
}
impl Grammar {
/// Returns an iterator over all nodes in the grammar.
pub fn iter(&self) -> impl Iterator<Item = Node> + '_ {
(0..self.nodes.len()).map(Node)
}
/// Returns an iterator over all tokens in the grammar.
pub fn tokens(&self) -> impl Iterator<Item = Token> + '_ {
(0..self.tokens.len()).map(Token)
}
}
impl ops::Index<Node> for Grammar {
type Output = NodeData;
fn index(&self, Node(index): Node) -> &NodeData {
&self.nodes[index]
}
}
impl ops::Index<Token> for Grammar {
type Output = TokenData;
fn index(&self, Token(index): Token) -> &TokenData {
&self.tokens[index]
}
}
/// Data about a node.
#[derive(Debug)]
pub struct NodeData {
/// The name of the node.
///
/// In the rule `A = 'b' | 'c'`, this is `"A"`.
pub name: String,
/// The rule for this node.
///
/// In the rule `A = 'b' | 'c'`, this represents `'b' | 'c'`.
pub rule: Rule,
}
/// Data about a token.
#[derive(Debug)]
pub struct TokenData {
/// The name of the token.
pub name: String,
}
/// A production rule.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum Rule {
/// A labeled rule, like `a:B` (`"a"` is the label, `B` is the rule).
Labeled {
/// The label.
label: String,
/// The rule.
rule: Box<Rule>,
},
/// A node, like `A`.
Node(Node),
/// A token, like `'struct'`.
Token(Token),
/// A sequence of rules, like `'while' '(' Expr ')' Stmt`.
Seq(Vec<Rule>),
/// An alternative between many rules, like `'+' | '-' | '*' | '/'`.
Alt(Vec<Rule>),
/// An optional rule, like `A?`.
Opt(Box<Rule>),
/// A repeated rule, like `A*`.
Rep(Box<Rule>),
}
#[test]
fn smoke() {
let grammar = include_str!("../ungrammar.ungram");
let grammar = grammar.parse::<Grammar>().unwrap();
drop(grammar)
}
#[test]
fn test_rust_grammar() {
let _ = rust_grammar();
}

207
lib/ungrammar/src/parser.rs Normal file
View file

@ -0,0 +1,207 @@
//! Simple hand-written ungrammar parser.
#![allow(clippy::disallowed_types)]
use std::collections::HashMap;
use crate::{
Grammar, Node, NodeData, Rule, Token, TokenData,
error::{Result, format_err},
lexer::{self, TokenKind},
};
macro_rules! bail {
($loc:expr, $($tt:tt)*) => {{
let err = $crate::error::format_err!($($tt)*)
.with_location($loc);
return Err(err);
}};
}
pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> {
let mut p = Parser::new(tokens);
while !p.is_eof() {
node(&mut p)?;
}
p.finish()
}
#[derive(Default)]
struct Parser {
grammar: Grammar,
tokens: Vec<lexer::Token>,
node_table: HashMap<String, Node>,
token_table: HashMap<String, Token>,
}
const DUMMY_RULE: Rule = Rule::Node(Node(!0));
impl Parser {
fn new(mut tokens: Vec<lexer::Token>) -> Parser {
tokens.reverse();
Parser { tokens, ..Parser::default() }
}
fn peek(&self) -> Option<&lexer::Token> {
self.peek_n(0)
}
fn peek_n(&self, n: usize) -> Option<&lexer::Token> {
self.tokens.iter().nth_back(n)
}
fn bump(&mut self) -> Result<lexer::Token> {
self.tokens.pop().ok_or_else(|| format_err!("unexpected EOF"))
}
fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> {
let token = self.bump()?;
if token.kind != kind {
bail!(token.loc, "unexpected token, expected `{}`", what);
}
Ok(())
}
fn is_eof(&self) -> bool {
self.tokens.is_empty()
}
fn finish(self) -> Result<Grammar> {
for node_data in &self.grammar.nodes {
if matches!(node_data.rule, DUMMY_RULE) {
crate::error::bail!("Undefined node: {}", node_data.name)
}
}
Ok(self.grammar)
}
fn intern_node(&mut self, name: String) -> Node {
let len = self.node_table.len();
let grammar = &mut self.grammar;
*self.node_table.entry(name.clone()).or_insert_with(|| {
grammar.nodes.push(NodeData { name, rule: DUMMY_RULE });
Node(len)
})
}
fn intern_token(&mut self, name: String) -> Token {
let len = self.token_table.len();
let grammar = &mut self.grammar;
*self.token_table.entry(name.clone()).or_insert_with(|| {
grammar.tokens.push(TokenData { name });
Token(len)
})
}
}
fn node(p: &mut Parser) -> Result<()> {
let token = p.bump()?;
let node = match token.kind {
TokenKind::Node(it) => p.intern_node(it),
_ => bail!(token.loc, "expected ident"),
};
p.expect(TokenKind::Eq, "=")?;
if !matches!(p.grammar[node].rule, DUMMY_RULE) {
bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name)
}
let rule = rule(p)?;
p.grammar.nodes[node.0].rule = rule;
Ok(())
}
fn rule(p: &mut Parser) -> Result<Rule> {
if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() {
bail!(
*loc,
"The first element in a sequence of productions or alternatives \
must not have a leading pipe (`|`)"
);
}
let lhs = seq_rule(p)?;
let mut alt = vec![lhs];
while let Some(token) = p.peek() {
if token.kind != TokenKind::Pipe {
break;
}
p.bump()?;
let rule = seq_rule(p)?;
alt.push(rule)
}
let res = if alt.len() == 1 { alt.pop().unwrap() } else { Rule::Alt(alt) };
Ok(res)
}
fn seq_rule(p: &mut Parser) -> Result<Rule> {
let lhs = atom_rule(p)?;
let mut seq = vec![lhs];
while let Some(rule) = opt_atom_rule(p)? {
seq.push(rule)
}
let res = if seq.len() == 1 { seq.pop().unwrap() } else { Rule::Seq(seq) };
Ok(res)
}
fn atom_rule(p: &mut Parser) -> Result<Rule> {
match opt_atom_rule(p)? {
Some(it) => Ok(it),
None => {
let token = p.bump()?;
bail!(token.loc, "unexpected token")
}
}
}
fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
let token = match p.peek() {
Some(it) => it,
None => return Ok(None),
};
let mut res = match &token.kind {
TokenKind::Node(name) => {
if let Some(lookahead) = p.peek_n(1) {
match lookahead.kind {
TokenKind::Eq => return Ok(None),
TokenKind::Colon => {
let label = name.clone();
p.bump()?;
p.bump()?;
let rule = atom_rule(p)?;
let res = Rule::Labeled { label, rule: Box::new(rule) };
return Ok(Some(res));
}
_ => (),
}
}
match p.peek_n(1) {
Some(token) if token.kind == TokenKind::Eq => return Ok(None),
_ => (),
}
let name = name.clone();
p.bump()?;
let node = p.intern_node(name);
Rule::Node(node)
}
TokenKind::Token(name) => {
let name = name.clone();
p.bump()?;
let token = p.intern_token(name);
Rule::Token(token)
}
TokenKind::LParen => {
p.bump()?;
let rule = rule(p)?;
p.expect(TokenKind::RParen, ")")?;
rule
}
_ => return Ok(None),
};
if let Some(token) = p.peek() {
match &token.kind {
TokenKind::QMark => {
p.bump()?;
res = Rule::Opt(Box::new(res));
}
TokenKind::Star => {
p.bump()?;
res = Rule::Rep(Box::new(res));
}
_ => (),
}
}
Ok(Some(res))
}

View file

@ -0,0 +1,16 @@
/// ungrammar for ungrammar
Grammar =
Node *
Node =
name:'ident' '=' Rule
Rule =
'ident'
| 'token_ident'
| Rule *
| Rule ( '|' Rule) *
| Rule '?'
| Rule '*'
| '(' Rule ')'
| label:'ident' ':' Rule

View file

@ -0,0 +1,12 @@
[package]
name = "ungrammar2json"
description = "Convert ungrammar files to JSON"
version = "1.0.0"
license = "MIT OR Apache-2.0"
repository = "https://github.com/matklad/ungrammar"
authors = ["Aleksey Kladov <aleksey.kladov@gmail.com>"]
edition = "2024"
[dependencies]
write-json = "0.1.1"
ungrammar = { path = "../", version = "1.1.0" }

View file

@ -0,0 +1,78 @@
#![allow(clippy::print_stderr, clippy::print_stdout)]
use std::{
env,
io::{self, Read},
process,
};
use ungrammar::{Grammar, Rule};
fn main() {
if let Err(err) = try_main() {
eprintln!("{}", err);
process::exit(101);
}
}
fn try_main() -> io::Result<()> {
if env::args().count() != 1 {
eprintln!("Usage: ungrammar2json < grammar.ungram > grammar.json");
return Ok(());
}
let grammar = read_stdin()?;
let grammar = grammar
.parse::<Grammar>()
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let mut buf = String::new();
grammar_to_json(&grammar, write_json::object(&mut buf));
println!("{}", buf);
Ok(())
}
fn read_stdin() -> io::Result<String> {
let mut buf = String::new();
io::stdin().lock().read_to_string(&mut buf)?;
Ok(buf)
}
fn grammar_to_json(grammar: &Grammar, mut obj: write_json::Object<'_>) {
for node in grammar.iter() {
let node = &grammar[node];
rule_to_json(grammar, &node.rule, obj.object(&node.name));
}
}
fn rule_to_json(grammar: &Grammar, rule: &Rule, mut obj: write_json::Object<'_>) {
match rule {
Rule::Labeled { label, rule } => {
obj.string("label", label);
rule_to_json(grammar, rule, obj.object("rule"))
}
Rule::Node(node) => {
obj.string("node", &grammar[*node].name);
}
Rule::Token(token) => {
obj.string("token", &grammar[*token].name);
}
Rule::Seq(rules) | Rule::Alt(rules) => {
let tag = match rule {
Rule::Seq(_) => "seq",
Rule::Alt(_) => "alt",
_ => unreachable!(),
};
let mut array = obj.array(tag);
for rule in rules {
rule_to_json(grammar, rule, array.object());
}
}
Rule::Opt(arg) | Rule::Rep(arg) => {
let tag = match rule {
Rule::Opt(_) => "opt",
Rule::Rep(_) => "rep",
_ => unreachable!(),
};
rule_to_json(grammar, arg, obj.object(tag));
}
}
}

View file

@ -127,21 +127,24 @@ fn check_cargo_toml(path: &Path, text: String) {
}
fn check_licenses(sh: &Shell) {
const EXPECTED: [&str; 20] = [
const EXPECTED: &[&str] = &[
"(MIT OR Apache-2.0) AND Unicode-3.0",
"0BSD OR MIT OR Apache-2.0",
"Apache-2.0",
"Apache-2.0 / MIT",
"Apache-2.0 OR BSL-1.0",
"Apache-2.0 OR MIT",
"Apache-2.0 WITH LLVM-exception",
"Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT",
"Apache-2.0 WITH LLVM-exception",
"Apache-2.0",
"Apache-2.0/MIT",
"BSD-2-Clause OR Apache-2.0 OR MIT",
"CC0-1.0",
"ISC",
"MIT",
"MIT / Apache-2.0",
"MIT OR Apache-2.0 OR LGPL-2.1-or-later",
"MIT OR Apache-2.0",
"MIT OR Zlib OR Apache-2.0",
"MIT",
"MIT/Apache-2.0",
"MPL-2.0",
"Unicode-3.0",
@ -159,18 +162,20 @@ fn check_licenses(sh: &Shell) {
.collect::<Vec<_>>();
licenses.sort_unstable();
licenses.dedup();
if licenses != EXPECTED {
let mut expected = EXPECTED.to_vec();
expected.sort_unstable();
if licenses != expected {
let mut diff = String::new();
diff.push_str("New Licenses:\n");
for &l in licenses.iter() {
if !EXPECTED.contains(&l) {
if !expected.contains(&l) {
diff += &format!(" {l}\n")
}
}
diff.push_str("\nMissing Licenses:\n");
for l in EXPECTED {
for l in expected {
if !licenses.contains(&l) {
diff += &format!(" {l}\n")
}
@ -178,7 +183,7 @@ fn check_licenses(sh: &Shell) {
panic!("different set of licenses!\n{diff}");
}
assert_eq!(licenses, EXPECTED);
assert_eq!(licenses, expected);
}
fn check_test_attrs(path: &Path, text: &str) {