feat: adding prefixes to dictionary (#2212)

* feat: adding prefixes to dictionary

* feat: `AffixData` for `DictWordMetadata`
This commit is contained in:
Andrew Dunbar 2025-12-11 19:00:29 +00:00 committed by GitHub
parent 6ac8406e29
commit f15778ed28
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 112 additions and 51 deletions

View file

@ -997,6 +997,14 @@
"metadata": { "metadata": {
"//": "not yet implemented" "//": "not yet implemented"
} }
},
"(": {
"#": "prefix property",
"metadata": {
"affix": {
"is_prefix": true
}
}
} }
} }
} }

View file

@ -11157,7 +11157,7 @@ Zworykin/g
Zyrtec/g Zyrtec/g
Zyuganov/g Zyuganov/g
Zzz Zzz
a/~DP a/~DP(
a.m./ a.m./
aah/NV aah/NV
aardvark/~NSg aardvark/~NSg
@ -12585,7 +12585,7 @@ antagonist/~NSg
antagonistic/~JQ antagonistic/~JQ
antagonize/VdSG antagonize/VdSG
antarctic/~J antarctic/~J
ante/~NSgV ante/~NSgV(
anteater/NgS anteater/NgS
antebellum/~J antebellum/~J
antecedence/Nmg antecedence/Nmg
@ -12620,7 +12620,7 @@ anthropomorphise/V!_
anthropomorphism/Nmg anthropomorphism/Nmg
anthropomorphize/V anthropomorphize/V
anthropomorphous/J anthropomorphous/J
anti/~JNSgP anti/~JNSgP(
antiabortion/J antiabortion/J
antiabortionist/NgS antiabortionist/NgS
antiaircraft/JN antiaircraft/JN
@ -12949,7 +12949,7 @@ arbutus/NgS
arc/~NSgVdG arc/~NSgVdG
arcade/~NgSV arcade/~NgSV
arcane/~J arcane/~J
arch/~NgSVGd>J^YpZv arch/~NgSVGd>J^YpZv(
archaeological/~JY archaeological/~JY
archaeologist/~NSg archaeologist/~NSg
archaeology/~Nmg archaeology/~Nmg
@ -13508,7 +13508,7 @@ authorized/~JVtT
authorship/~Nmg authorship/~Nmg
autism/~Nmg autism/~Nmg
autistic/~JN autistic/~JN
auto/~JNgSV auto/~JNgSV(
autobahn/~NSg autobahn/~NSg
autobiographer/NSg autobiographer/NSg
autobiographic/J autobiographic/J
@ -14436,6 +14436,7 @@ bend/~VbG>SNgBZ
bendability/Nmg bendability/Nmg
bender/~Ng bender/~Ng
bendy/J^>N bendy/J^>N
# bene # prefixes that are not also words in their own right don't belong in the dictionary
beneath/~P beneath/~P
benedictine/~ benedictine/~
benediction/NwSg benediction/NwSg
@ -14572,7 +14573,7 @@ bezel/NgS
bezier/NgS bezier/NgS
bf/~N bf/~N
bhaji/N bhaji/N
bi/~J>NSgZ bi/~J>NSgZ(
biannual/JYN biannual/JYN
bias/~NgSVGdJ bias/~NgSVGdJ
biased/~JVtTU biased/~JVtTU
@ -16892,6 +16893,7 @@ center/~NgJVdG
centerboard/NSg centerboard/NSg
centerfold/NgS centerfold/NgS
centerpiece/~NgS centerpiece/~NgS
# centi # prefixes that are not also words in their own right don't belong in the dictionary
centigrade/JN centigrade/JN
centigram/NSg centigram/NSg
centiliter/NgS< centiliter/NgS<
@ -17554,6 +17556,7 @@ circularize/VdSG
circulate/~VdSGr circulate/~VdSGr
circulation/~NSgr circulation/~NSgr
circulatory/JN circulatory/JN
# circum # prefixes that are not also words in their own right don't belong in the dictionary
circumcise/VdSGXn circumcise/VdSGXn
circumcised/JVtTNU circumcised/JVtTNU
circumcision/~Ng circumcision/~Ng
@ -17913,7 +17916,7 @@ clxvi
clxvii clxvii
cm/~ cm/~
cnidarian/NgS cnidarian/NgS
co/~NSIdE co/~NSIdE(
coach/~NgSVdG coach/~NgSVdG
coachload/NS coachload/NS
coachman/N0g coachman/N0g
@ -20368,6 +20371,7 @@ dc/~N
dd/~NSdG dd/~NSdG
dded/K dded/K
dding/K dding/K
# de # prefixes that are not also words in their own right don't belong in the dictionary
deacon/~NgSV deacon/~NgSV
deaconess/NgS deaconess/NgS
dead/~J^Y>NgVXn dead/~J^Y>NgVXn
@ -20439,6 +20443,7 @@ debtor/~NgS
debugger/NSg debugger/NSg
debut/~NgVGd debut/~NgVGd
debutante/NSg debutante/NSg
# deca # prefixes that are not also words in their own right don't belong in the dictionary
decade/~NgS decade/~NgS
decadence/~Nmg decadence/~Nmg
decadency/Nmg decadency/Nmg
@ -21117,6 +21122,7 @@ dextrose/Nmg
dharma/~NwgS dharma/~NwgS
dhoti/NSg dhoti/NSg
dhow/NgS dhow/NgS
# di # prefixes that are not also words in their own right don't belong in the dictionary
diabetes/~Nmg diabetes/~Nmg
diabetic/~JNSg diabetic/~JNSg
diabolic/J diabolic/J
@ -21406,7 +21412,7 @@ dirtball/NS
dirtily/Ry dirtily/Ry
dirtiness/Nmg dirtiness/Nmg
dirty/~J^>VdSGp dirty/~J^>VdSGp
dis/~VNgI dis/~VNgI(
disable/~VdSGJL disable/~VdSGJL
disablement/Ng disablement/Ng
disambiguate/~VSdGn disambiguate/~VSdGn
@ -22399,6 +22405,7 @@ dynamo/~NSg
dynastic/~J dynastic/~J
dynasty/~NSg dynasty/~NSg
dyno/NSg dyno/NSg
# dys # prefixes that are not also words in their own right don't belong in the dictionary
dysentery/~Nmg dysentery/~Nmg
dysfunction/~Nmg dysfunction/~Nmg
dysfunctional/~J dysfunctional/~J
@ -22989,7 +22996,7 @@ emulsification/Nmg
emulsifier/~NgS emulsifier/~NgS
emulsify/Vd>SGnZ emulsify/Vd>SGnZ
emulsion/~NwgSV emulsion/~NwgSV
en/~NSgPI en/~NSgPI(
enable/~Vd>SGZ enable/~Vd>SGZ
enabler/NgS enabler/NgS
enact/~VSdGrL enact/~VSdGrL
@ -23575,6 +23582,7 @@ etude/NSg
etymological/~JY etymological/~JY
etymologist/NSg etymologist/NSg
etymology/~NwSg etymology/~NwSg
# eu # prefixes that are not also words in their own right don't belong in the dictionary
eucalypti/N9 eucalypti/N9
eucalyptus/~N0gS eucalyptus/~N0gS
euchre/NSgVdG euchre/NSgVdG
@ -23682,7 +23690,7 @@ evolutionist/NSg
evolve/~VdSG evolve/~VdSG
ewe/~NSg>Z ewe/~NSg>Z
ewer/Ng ewer/Ng
ex/~NgSVJ ex/~NgSVJ(
exabyte/NgS exabyte/NgS
exacerbate/VGdSn exacerbate/VGdSn
exacerbation/Nwg exacerbation/Nwg
@ -24032,7 +24040,7 @@ extortion/~Ng>Z
extortionate/JY extortionate/JY
extortioner/Ng extortioner/Ng
extortionist/NgS extortionist/NgS
extra/~JNSg extra/~JNSg(
extracellular/~J extracellular/~J
extract/~NgSVdGv extract/~NgSVdGv
extraction/~NwSg extraction/~NwSg
@ -25314,7 +25322,7 @@ forceps/N09g
forcible/~J forcible/~J
forcibly/~Ry forcibly/~Ry
ford/~NgSVdGB ford/~NgSVdGB
fore/~JNgS fore/~JNgS(
forearm/~NSgVGd forearm/~NSgVGd
forebear/NgSV forebear/NgSV
forebode/VGdSNz forebode/VGdSNz
@ -28126,7 +28134,7 @@ hesitate/~VdSGnX
hesitating/VNYU hesitating/VNYU
hesitation/~Ng hesitation/~Ng
hessian/~N hessian/~N
hetero/~JNSg hetero/~JNSg(
heterodox/J heterodox/J
heterodoxy/Nmg heterodoxy/Nmg
heterogeneity/Ng heterogeneity/Ng
@ -28427,7 +28435,7 @@ homily/~NSg
hominid/NSgJ hominid/NSgJ
hominoid/NS hominoid/NS
hominy/Ng hominy/Ng
homo/~NgSJ homo/~NgSJ(
homoerotic/J homoerotic/J
homogeneity/Ng homogeneity/Ng
homogeneous/~JY homogeneous/~JY
@ -28917,6 +28925,7 @@ hymn/~NgSVdG
hymnal/~NgSJ hymnal/~NgSJ
hymnbook/NSg hymnbook/NSg
hype/~NmgSVGd>J hype/~NmgSVGd>J
# hyper # prefixes that are not also words in their own right don't belong in the dictionary
hyperactive/J hyperactive/J
hyperactivity/~Ng hyperactivity/~Ng
hyperaggressive/J hyperaggressive/J
@ -29113,6 +29122,7 @@ ignore/~VGdS
iguana/~NgS iguana/~NgS
ii/~ ii/~
iii/~ iii/~
# il # prefixes that are not also words in their own right don't belong in the dictionary
ilea/N ilea/N
ileitis/Ng ileitis/Ng
ileum/Ng ileum/Ng
@ -29153,6 +29163,7 @@ illustrative/~JY
illustrator/~NSg illustrator/~NSg
illustrious/~JYp illustrious/~JYp
illustriousness/Nmg illustriousness/Nmg
# im # prefixes that are not also words in their own right don't belong in the dictionary
image/~NwSgVdG image/~NwSgVdG
imager/NgS imager/NgS
imagery/~Nmg imagery/~Nmg
@ -29464,7 +29475,7 @@ impure/~JY^>V
impurity/~NSg impurity/~NSg
imputation/NSg imputation/NSg
impute/VdSGB impute/VdSGB
in/~PJRrg # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal in/~PJRrg( # removed `4`, verb senses are obsolete, `NS`, noun sense is marginal
inaccuracy/NwgS inaccuracy/NwgS
inaction/~Nmg inaction/~Nmg
inadequacy/NS inadequacy/NS
@ -30114,7 +30125,7 @@ intent/~NSgJYp
intention/~NgSV intention/~NgSV
intentional/~JYNU intentional/~JYNU
intentness/Ng intentness/Ng
inter/~VSEL inter/~VSEL(
interact/~VGdSNv interact/~VGdSNv
interaction/~NwSg interaction/~NwSg
interactive/~JYN interactive/~JYN
@ -30298,6 +30309,7 @@ intonation/~NSg
intoxicant/NSgJ intoxicant/NSgJ
intoxicate/VdSGJn intoxicate/VdSGJn
intoxication/~Ng intoxication/~Ng
# intra # prefixes that are not also words in their own right don't belong in the dictionary
intracranial/~J intracranial/~J
intramural/~JN intramural/~JN
intramuscular/J intramuscular/J
@ -30316,7 +30328,7 @@ intriguer/Ng
intriguing/~JYV6N intriguing/~JYV6N
intrinsic/~JNgS intrinsic/~JNgS
intrinsically/~Ry intrinsically/~Ry
intro/~NSgV intro/~NSgV(
introduce/~VGdSr introduce/~VGdSr
introduction/~N0gr introduction/~N0gr
introductions/~N9 introductions/~N9
@ -31100,7 +31112,7 @@ killer/~NgJ
killing/~JNgV killing/~JNgV
killjoy/NSg killjoy/NSg
kiln/~NgSVdG kiln/~NgSVdG
kilo/~NgS kilo/~NgS(
kilobit/NSg kilobit/NSg
kilobyte/NSg kilobyte/NSg
kilocoulomb/S kilocoulomb/S
@ -32654,7 +32666,7 @@ mackerel/~NwSg
mackinaw/NSg mackinaw/NSg
mackintosh/~NgS mackintosh/~NgS
macrame/NgV macrame/NgV
macro/~JNSg macro/~JNSg(
macroaggregate/Ng macroaggregate/Ng
macrobiotic/JS macrobiotic/JS
macrobiotics/Nwg macrobiotics/Nwg
@ -33383,7 +33395,7 @@ meeting/~NwgSV
meetinghouse/NSg meetinghouse/NSg
meetup/NgS meetup/NgS
meg/~NSV meg/~NSV
mega/~JN mega/~JN(
megabit/NSg megabit/NSg
megabucks/Ng megabucks/Ng
megabyte/NgS megabyte/NgS
@ -33700,7 +33712,7 @@ mica/~Ng
mice/~N9V mice/~N9V
mick/~NSJ mick/~NSJ
mickey/~NgSV mickey/~NgSV
micro/~JNSgV micro/~JNSgV(
microaggression/NSg microaggression/NSg
microarchitecture/NgS microarchitecture/NgS
microbe/NgS microbe/NgS
@ -33762,7 +33774,7 @@ microtransaction/NSg
microvascular/J microvascular/J
microwave/~NSgVdGB microwave/~NSgVdGB
microwaveable/J microwaveable/J
mid/~JPN mid/~JPN(
midair/J midair/J
midcentury/J midcentury/J
midday/~Ng midday/~Ng
@ -33864,6 +33876,7 @@ millennial/JNgS
millennium/~NgS millennium/~NgS
miller/~Ng miller/~Ng
millet/~Ng millet/~Ng
# milli # prefixes that are not also words in their own right don't belong in the dictionary
milliamp/NgS milliamp/NgS
milliard/Sg milliard/Sg
millibar/NgS millibar/NgS
@ -33929,7 +33942,7 @@ minestrone/Nmg
minesweeper/NSg minesweeper/NSg
mingle/VdGSN mingle/VdGSN
mingy/J mingy/J
mini/~JNgS mini/~JNgS(
miniature/~NgSJV miniature/~NgSJV
miniaturisation/Ng!_ miniaturisation/Ng!_
miniaturise/VGdS!_ miniaturise/VGdS!_
@ -34001,6 +34014,7 @@ mirthful/JYp
mirthfulness/Nmg mirthfulness/Nmg
mirthless/JY mirthless/JY
miry/J>^ miry/J>^
# mis # prefixes that are not also words in their own right don't belong in the dictionary
misaddress/VdSG misaddress/VdSG
misadventure/NwgS misadventure/NwgS
misaligned/JV misaligned/JV
@ -34400,7 +34414,7 @@ monkey/~NgSVdG
monkeyshine/NSg monkeyshine/NSg
monkish/J monkish/J
monkshood/NSg monkshood/NSg
mono/~NgJ mono/~NgJ(
monochromatic/~J monochromatic/~J
monochrome/~NgSJ monochrome/~NgSJ
monocle/NSgd monocle/NSgd
@ -34774,7 +34788,7 @@ mullet/~NgS
mulligan/~NSg mulligan/~NSg
mulligatawny/Ng mulligatawny/Ng
mullion/NSgVd mullion/NSgVd
multi/~N multi/~N(
multibillion/J multibillion/J
multibyte/J multibyte/J
multicellular/J multicellular/J
@ -35273,6 +35287,7 @@ nelson/~NSg
nematode/NSg nematode/NSg
nemeses/N9 nemeses/N9
nemesis/~N0g nemesis/~N0g
# neo # prefixes that are not also words in their own right don't belong in the dictionary
neoclassic/J neoclassic/J
neoclassical/~JN neoclassical/~JN
neoclassicism/Nmg neoclassicism/Nmg
@ -35594,7 +35609,7 @@ nomination's/r
nominative/~JNSg nominative/~JNSg
nominator/~NSge nominator/~NSge
nominee/~NgS nominee/~NgS
non/~N non/~N(
nonabrasive/JN nonabrasive/JN
nonabsorbent/JSg nonabsorbent/JSg
nonacademic/JN nonacademic/JN
@ -36462,6 +36477,7 @@ omission/~NwgS
omit/~VS omit/~VS
omitted/~V omitted/~V
omitting/~VN omitting/~VN
# omni # prefixes that are not also words in their own right don't belong in the dictionary
omnibus/~NgSJV omnibus/~NgSJV
omnidirectional/J omnidirectional/J
omnipotence/Nmg omnipotence/Nmg
@ -36779,7 +36795,7 @@ ourself/Ia1F # I:pronoun a:personal 1:person .~singular F:reflexive (of t
ourselves/~Ia1F: # I:pronoun a:personal 1:person :~plural F:reflexive ourselves/~Ia1F: # I:pronoun a:personal 1:person :~plural F:reflexive
oust/~VGd>SZ oust/~VGd>SZ
ouster/~NgSV ouster/~NgSV
out/~PNSgVGd>JRz out/~PNSgVGd>JRz(
outage/NSg outage/NSg
outargue/VGdS outargue/VGdS
outback/~NgSJV outback/~NgSJV
@ -36942,7 +36958,7 @@ oven/~NgSV
ovenbird/NSg ovenbird/NSg
ovenproof/J ovenproof/J
ovenware/Nmg ovenware/Nmg
over/~JYNgSP over/~JYNgSP(
overabundance/Ng overabundance/Ng
overabundant/J overabundant/J
overachieve/VGd>SZ overachieve/VGd>SZ
@ -37439,7 +37455,7 @@ pampas/Ng
pamper/VdGSN pamper/VdGSN
pamphlet/~NgSV pamphlet/~NgSV
pamphleteer/NgSV pamphleteer/NgSV
pan/~NSgVJ pan/~NSgVJ(
panacea/NSg panacea/NSg
panache/Ng panache/Ng
panama/~NgS panama/~NgS
@ -37528,7 +37544,7 @@ paprika/~NmgJ
papyri/~N9 papyri/~N9
papyrus/~N0g papyrus/~N0g
par/~NSgJ>PVGdZBz par/~NSgJ>PVGdZBz
para/~NgSJ para/~NgSJ(
parable/~NgSVJ parable/~NgSVJ
parabola/N0Sg parabola/N0Sg
parabolæ/N9 parabolæ/N9
@ -38137,6 +38153,7 @@ peppy/J^>Np
pepsin/Ng pepsin/Ng
peptic/JNgS peptic/JNgS
peptide/~NS peptide/~NS
# per # prefixes that are not also words in their own right don't belong in the dictionary
peradventure/Ng peradventure/Ng
perambulate/VGdSXn perambulate/VGdSXn
perambulation/Nwg perambulation/Nwg
@ -39241,7 +39258,7 @@ polonaise/NSgV
polonium/Nmg polonium/Nmg
poltergeist/~NgS poltergeist/~NgS
poltroon/NSgJ poltroon/NSgJ
poly/~NJV poly/~NJV(
polyacrylamide/N polyacrylamide/N
polyamory/NS polyamory/NS
polyandrous/J polyandrous/J
@ -39448,7 +39465,7 @@ possibility/~NSg
possible/~JNSg possible/~JNSg
possibly/~R # adverb of probability/certainty/affirmation; modal adverb possibly/~R # adverb of probability/certainty/affirmation; modal adverb
possum/~NSgV possum/~NSgV
post/~NwgSVGd>PZz post/~NwgSVGd>PZz(
postage/~Nmg postage/~Nmg
postal/~J postal/~J
postbag/NgS postbag/NgS
@ -40034,7 +40051,7 @@ prizefighter/Ng
prizefighting/Ng prizefighting/Ng
prizewinner/NgS prizewinner/NgS
prizewinning/J prizewinning/J
pro/~NSgPJ pro/~NSgPJ(
probabilistic/~J probabilistic/~J
probability/~NSg probability/~NSg
probable/~JNSg probable/~JNSg
@ -40316,6 +40333,7 @@ protein/~NwSg
protest/NwgS protest/NwgS
protestant/~JNgS protestant/~JNgS
protestation/NwgS protestation/NwgS
# proto # prefixes that are not also words in their own right don't belong in the dictionary
protocol/~NwgSV protocol/~NwgSV
proton/~NSg proton/~NSg
protoplasm/Nmg protoplasm/Nmg
@ -40389,7 +40407,7 @@ psaltery/NSg
psephologist/NS psephologist/NS
psephology/N psephology/N
pseud/NS pseud/NS
pseudo/~NSJ pseudo/~NSJ(
pseudocode/NmgG pseudocode/NmgG
pseudonym/~NSg pseudonym/~NSg
pseudonymous/~J pseudonymous/~J
@ -41244,7 +41262,7 @@ razz/NgSVGd
razzmatazz/Ng razzmatazz/Ng
rcpt/N rcpt/N
rd/~N rd/~N
re/PNSgvz re/PNSgvz(
reach/~VdGSNgB reach/~VdGSNgB
reachable/~JNU reachable/~JNU
reacquire/VdSG reacquire/VdSG
@ -42130,7 +42148,7 @@ retributive/J
retrieval/~NSg retrieval/~NSg
retrieve/~Vd>GSNgZB retrieve/~Vd>GSNgZB
retriever/Ng retriever/Ng
retro/~JNmgS retro/~JNmgS(
retroactive/~JY retroactive/~JY
retrofire/NSVGdJ retrofire/NSVGdJ
retrofit/~VSNg retrofit/~VSNg
@ -43736,7 +43754,7 @@ semaphore/NSgVdG
semblance/NSgr semblance/NSgr
semen/~Nmg semen/~Nmg
semester/~NSg semester/~NSg
semi/~NgS semi/~NgS(
semiannual/JYN semiannual/JYN
semiarid/J semiarid/J
semiautomatic/JNgSQ semiautomatic/JNgSQ
@ -46775,7 +46793,7 @@ suasion/NgE
suave/J>Y^Np suave/J>Y^Np
suaveness/Ng suaveness/Ng
suavity/Ng suavity/Ng
sub/~NSgVP sub/~NSgVP(
subaltern/JNgS subaltern/JNgS
subaqua/J subaqua/J
subarctic/~ONJ subarctic/~ONJ
@ -47144,7 +47162,7 @@ suntanning/V6
suntrap/NS suntrap/NS
sunup/Ng sunup/Ng
sup/~V>SNgJZ sup/~V>SNgJZ
super/~JNgV super/~JNgV(
superabundance/NwgS superabundance/NwgS
superabundant/J superabundant/J
superannuate/VGdSn superannuate/VGdSn
@ -47283,6 +47301,7 @@ supremacy/~Ng
supreme/~JYVN supreme/~JYVN
supremo/NS supremo/NS
supt/V supt/V
# sur # prefixes that are not also words in their own right don't belong in the dictionary
surcease/NSgVdG surcease/NSgVdG
surcharge/NSgVdG surcharge/NSgVdG
surcingle/NSgV surcingle/NSgV
@ -48004,6 +48023,7 @@ teetotalism/Ng
teetotaller/NgS!@_ teetotaller/NgS!@_
tektite/NSg tektite/NSg
tel/~N tel/~N
# tele # prefixes that are not also words in their own right don't belong in the dictionary
telecast/~VG>SNgZ telecast/~VG>SNgZ
telecaster/Ng telecaster/Ng
telecom/NgS telecom/NgS
@ -49152,7 +49172,7 @@ tranquilizer/Ng
tranquillise/Vd>SGZ!_ tranquillise/Vd>SGZ!_
tranquilliser/Ng!_ tranquilliser/Ng!_
tranquillity/Ng!_ tranquillity/Ng!_
trans/~JNVi trans/~JNVi(
transact/VdGS transact/VdGS
transaction/~NSg transaction/~NSg
transactional/J transactional/J
@ -49372,6 +49392,7 @@ tress/NgSVE
trestle/~NgS trestle/~NgS
trews/N trews/N
trey/~NgS trey/~NgS
# tri # prefixes that are not also words in their own right don't belong in the dictionary
triad/~NSg triad/~NSg
triage/NmgVd triage/NmgVd
triager/NSg triager/NSg
@ -49900,7 +49921,7 @@ ulterior/J
ultimate/~JYNgV ultimate/~JYNgV
ultimatum/~NgS ultimatum/~NgS
ultimo/~JN ultimo/~JN
ultra/~JNSg ultra/~JNSg(
ultraconservative/JNSg ultraconservative/JNSg
ultrahigh/J ultrahigh/J
ultraist/NSg ultraist/NSg
@ -49929,6 +49950,7 @@ umlaut/NgSV
ump/NSgVGd ump/NSgVGd
umpire/~NgSVGd umpire/~NgSVGd
umpteen/H umpteen/H
# un # prefixes that are not also words in their own right don't belong in the dictionary
unabridged/~JNgS unabridged/~JNgS
unacceptability/Nmg unacceptability/Nmg
unacceptable/~JN unacceptable/~JN
@ -50026,7 +50048,7 @@ undecided/~JNSgV
undefine/VGdS undefine/VGdS
undemonstrative/JY undemonstrative/JY
undeniably/Ry undeniably/Ry
under/~PJN under/~PJN(
underachieve/VGd>SLZ underachieve/VGd>SLZ
underachiever/Ng underachiever/Ng
underact/VSdG underact/VSdG
@ -50259,6 +50281,7 @@ unhealthy/~J^
unhistorical/J unhistorical/J
unholy/~J^ unholy/~J^
unhurt/J unhurt/J
# uni # prefixes that are not also words in their own right don't belong in the dictionary
unibody/NSg unibody/NSg
unicameral/~J unicameral/~J
unicellular/JN unicellular/JN
@ -51037,7 +51060,7 @@ vicar/~NSg
vicarage/~NSg vicarage/~NSg
vicarious/JYp vicarious/JYp
vicariousness/Ng vicariousness/Ng
vice/~NgSVJPe vice/~NgSVJPe(
viced/JVtT viced/JVtT
vicegerent/NSgJ vicegerent/NSgJ
vicennial/JN vicennial/JN
@ -53414,7 +53437,7 @@ pentest/VSdG
pentester/NSg # penetration tester pentester/NSg # penetration tester
pentesting/NmgV6 pentesting/NmgV6
postfix/NgSVdG postfix/NgSVdG
pre/~PNV # !! please check and comment !! dictionaries only list prefix pre- pre/~PNV( # !! please check and comment !! dictionaries only list prefix pre-
preshared/J preshared/J
quadtree/NgS # data structure quadtree/NgS # data structure
quicksort/NgSVdG # algo quicksort/NgSVdG # algo

View file

@ -18,12 +18,20 @@ use crate::{Document, TokenKind, TokenStringExt};
/// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme. /// having their own lexeme, but "Ivy" and "ivy" sharing the same lexeme.
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)] #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
pub struct DictWordMetadata { pub struct DictWordMetadata {
/// The main parts of speech which have extra data.
pub noun: Option<NounData>, pub noun: Option<NounData>,
pub pronoun: Option<PronounData>, pub pronoun: Option<PronounData>,
pub verb: Option<VerbData>, pub verb: Option<VerbData>,
pub adjective: Option<AdjectiveData>, pub adjective: Option<AdjectiveData>,
pub adverb: Option<AdverbData>, pub adverb: Option<AdverbData>,
pub conjunction: Option<ConjunctionData>, pub conjunction: Option<ConjunctionData>,
pub determiner: Option<DeterminerData>,
pub affix: Option<AffixData>,
/// Parts of speech which don't have extra data.
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
#[serde(default = "default_false")]
pub preposition: bool,
/// Whether the word is an offensive word.
pub swear: Option<bool>, pub swear: Option<bool>,
/// The dialects this word belongs to. /// The dialects this word belongs to.
/// If no dialects are defined, it can be assumed that the word is /// If no dialects are defined, it can be assumed that the word is
@ -33,11 +41,6 @@ pub struct DictWordMetadata {
/// Orthographic information: letter case, spaces, hyphens, etc. /// Orthographic information: letter case, spaces, hyphens, etc.
#[serde(default = "OrthFlags::empty")] #[serde(default = "OrthFlags::empty")]
pub orth_info: OrthFlags, pub orth_info: OrthFlags,
/// Whether the word is a [determiner](https://en.wikipedia.org/wiki/English_determiners).
pub determiner: Option<DeterminerData>,
/// Whether the word is a [preposition](https://www.merriam-webster.com/dictionary/preposition).
#[serde(default = "default_false")]
pub preposition: bool,
/// Whether the word is considered especially common. /// Whether the word is considered especially common.
#[serde(default = "default_false")] #[serde(default = "default_false")]
pub common: bool, pub common: bool,
@ -189,11 +192,12 @@ impl DictWordMetadata {
adjective: merge!(self.adjective, other.adjective), adjective: merge!(self.adjective, other.adjective),
adverb: merge!(self.adverb, other.adverb), adverb: merge!(self.adverb, other.adverb),
conjunction: merge!(self.conjunction, other.conjunction), conjunction: merge!(self.conjunction, other.conjunction),
determiner: merge!(self.determiner, other.determiner),
affix: merge!(self.affix, other.affix),
preposition: self.preposition || other.preposition,
dialects: self.dialects | other.dialects, dialects: self.dialects | other.dialects,
orth_info: self.orth_info | other.orth_info, orth_info: self.orth_info | other.orth_info,
swear: self.swear.or(other.swear), swear: self.swear.or(other.swear),
determiner: merge!(self.determiner, other.determiner),
preposition: self.preposition || other.preposition,
common: self.common || other.common, common: self.common || other.common,
derived_from: self.derived_from.or(other.derived_from), derived_from: self.derived_from.or(other.derived_from),
pos_tag: self.pos_tag.or(other.pos_tag), pos_tag: self.pos_tag.or(other.pos_tag),
@ -234,6 +238,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
PROPN => { PROPN => {
@ -259,6 +264,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
PRON => { PRON => {
@ -272,6 +278,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
VERB => { VERB => {
@ -293,6 +300,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
AUX => { AUX => {
@ -314,6 +322,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
ADJ => { ADJ => {
@ -327,6 +336,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
ADV => { ADV => {
@ -340,6 +350,7 @@ impl DictWordMetadata {
self.adjective = None; self.adjective = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
ADP => { ADP => {
@ -350,6 +361,7 @@ impl DictWordMetadata {
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = true; self.preposition = true;
} }
DET => { DET => {
@ -359,6 +371,7 @@ impl DictWordMetadata {
self.adjective = None; self.adjective = None;
self.adverb = None; self.adverb = None;
self.conjunction = None; self.conjunction = None;
self.affix = None;
self.preposition = false; self.preposition = false;
self.determiner = Some(DeterminerData::default()); self.determiner = Some(DeterminerData::default());
} }
@ -373,6 +386,7 @@ impl DictWordMetadata {
self.adjective = None; self.adjective = None;
self.adverb = None; self.adverb = None;
self.determiner = None; self.determiner = None;
self.affix = None;
self.preposition = false; self.preposition = false;
} }
_ => {} _ => {}
@ -958,6 +972,22 @@ impl ConjunctionData {
} }
} }
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AffixData {
pub is_prefix: Option<bool>,
pub is_suffix: Option<bool>,
}
impl AffixData {
/// Produce a copy of `self` with the known properties of `other` set.
pub fn or(&self, _other: &Self) -> Self {
Self {
is_prefix: self.is_prefix.or(_other.is_prefix),
is_suffix: self.is_suffix.or(_other.is_suffix),
}
}
}
/// A regional dialect. /// A regional dialect.
/// ///
/// Note: these have bit-shifted values so that they can ergonomically integrate with /// Note: these have bit-shifted values so that they can ergonomically integrate with