mirror of
https://github.com/Automattic/harper.git
synced 2025-12-23 08:48:15 +00:00
chore: mostly annotating verbs and nouns (#1874)
* chore: mostly annotating verbs and nouns Fixes #1873 * fix: added `.vscode/settings.json` instead of `dictionary.dict` * chore: verb work, POS snapshot fmt * fix: "repurpose" Fixes #1874 * chore: add and fix noun properties
This commit is contained in:
parent
7d13327f0f
commit
8664ecb580
15 changed files with 15386 additions and 15396 deletions
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
"typescript.tsdk": "node_modules/typescript/lib"
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -44,7 +44,7 @@
|
|||
//! - The `L` suffix means linking verb.
|
||||
//! - The `X` suffix means auxiliary verb.
|
||||
//! - The `B` suffix means base (lemma) form.
|
||||
//! - The `P` suffix means regular past tense & past participle.
|
||||
//! - The `P` suffix means simple past tense & past participle.
|
||||
//! - The `Pr` suffix means progressive form.
|
||||
//! - The `Pt` suffix means simple past tense.
|
||||
//! - The `Pp` suffix means past participle.
|
||||
|
|
@ -147,17 +147,30 @@ fn format_word_tag(word: &WordMetadata) -> String {
|
|||
add_bool(&mut tag, "L", verb.is_linking);
|
||||
add_bool(&mut tag, "X", verb.is_auxiliary);
|
||||
if let Some(forms) = verb.verb_forms {
|
||||
if forms.contains(VerbFormFlags::LEMMA) {
|
||||
tag.push_str("B");
|
||||
// If Lemma flag is explicity set; or if no verb forms are set Lemma is the default.
|
||||
match (
|
||||
forms.contains(VerbFormFlags::LEMMA),
|
||||
forms.contains(VerbFormFlags::PAST),
|
||||
forms.contains(VerbFormFlags::PAST_PARTICIPLE),
|
||||
forms.contains(VerbFormFlags::PRETERITE),
|
||||
forms.contains(VerbFormFlags::PROGRESSIVE),
|
||||
forms.contains(VerbFormFlags::THIRD_PERSON_SINGULAR),
|
||||
) {
|
||||
(true, _, _, _, _, _) | (false, false, false, false, false, false) => {
|
||||
tag.push_str("B")
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if forms.contains(VerbFormFlags::PAST) {
|
||||
tag.push_str("P");
|
||||
}
|
||||
if forms.contains(VerbFormFlags::PRETERITE) {
|
||||
tag.push_str("Pt");
|
||||
}
|
||||
if forms.contains(VerbFormFlags::PAST_PARTICIPLE) {
|
||||
tag.push_str("Pp");
|
||||
// Regular verbs set both together; Irregular verbs can set them separately.
|
||||
match (
|
||||
forms.contains(VerbFormFlags::PAST),
|
||||
forms.contains(VerbFormFlags::PRETERITE),
|
||||
forms.contains(VerbFormFlags::PAST_PARTICIPLE),
|
||||
) {
|
||||
(true, _, _) | (_, true, true) => tag.push_str("P"),
|
||||
(false, true, false) => tag.push_str("Pt"),
|
||||
(false, false, true) => tag.push_str("Pp"),
|
||||
_ => {}
|
||||
}
|
||||
if forms.contains(VerbFormFlags::PROGRESSIVE) {
|
||||
tag.push_str("g");
|
||||
|
|
@ -165,6 +178,8 @@ fn format_word_tag(word: &WordMetadata) -> String {
|
|||
if forms.contains(VerbFormFlags::THIRD_PERSON_SINGULAR) {
|
||||
tag.push_str("3");
|
||||
}
|
||||
} else {
|
||||
tag.push_str("B");
|
||||
}
|
||||
add(&tag, &mut tags);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1619,18 +1619,6 @@ Suggest:
|
|||
|
||||
|
||||
|
||||
Lint: Agreement (31 priority)
|
||||
Message: |
|
||||
1484 | begin lessons: you’d only have to whisper a hint to Time, and round goes the
|
||||
1485 | clock in a twinkling! Half-past one, time for dinner!”
|
||||
| ^~~~~~~~~~~ `twinkling` is a mass noun.
|
||||
Suggest:
|
||||
- Replace with: “twinkling”
|
||||
- Replace with: “some twinkling”
|
||||
- Replace with: “a piece of twinkling”
|
||||
|
||||
|
||||
|
||||
Lint: WordChoice (63 priority)
|
||||
Message: |
|
||||
1498 | March—just before he went mad, you know—” (pointing with his tea spoon at the
|
||||
|
|
|
|||
|
|
@ -1430,18 +1430,6 @@ Message: |
|
|||
|
||||
|
||||
|
||||
Lint: Spelling (63 priority)
|
||||
Message: |
|
||||
610 | the State wherein they reside. No State shall make or enforce any law which
|
||||
611 | shall abridge the privileges or immunities of citizens of the United States;
|
||||
| ^~~~~~~~~~ Did you mean to spell `immunities` this way?
|
||||
Suggest:
|
||||
- Replace with: “immunity's”
|
||||
- Replace with: “immensities”
|
||||
- Replace with: “immunizes”
|
||||
|
||||
|
||||
|
||||
Lint: Readability (127 priority)
|
||||
Message: |
|
||||
616 | The right of citizens of the United States, who are eighteen years of age or
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -6,304 +6,304 @@
|
|||
# Unlintable Unlintable
|
||||
> -->
|
||||
# Unlintable Unlintable
|
||||
> Part - of - speech tagging
|
||||
# Unlintable NSg/V/J+ . P . N🅪Sg/V+ NSg/V
|
||||
> Part - of - speech tagging
|
||||
# Unlintable NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg
|
||||
>
|
||||
#
|
||||
> In corpus linguistics , part - of - speech tagging ( POS tagging or PoS tagging or
|
||||
# NPr/J/P NSg+ Nᴹ+ . NSg/V/J+ . P . N🅪Sg/V+ NSg/V . NSg+ NSg/V NPr/C NSg+ NSg/V NPr/C
|
||||
> POST ) , also called grammatical tagging is the process of marking up a word in a
|
||||
# NPr🅪/V/P+ . . R/C VP/J J NSg/V VL3 D NSg/V P Nᴹ/Vg/J NSg/V/J/P D/P NSg/V+ NPr/J/P D/P
|
||||
> text ( corpus ) as corresponding to a particular part of speech , based on both its
|
||||
# N🅪Sg/V+ . NSg+ . NSg/R Nᴹ/Vg/J P D/P NSg/J NSg/V/J P N🅪Sg/V+ . VP/J J/P I/C/Dq ISg/D$+
|
||||
> definition and its context . A simplified form of this is commonly taught to
|
||||
# NSg V/C ISg/D$+ N🅪Sg/V+ . D/P VP/J N🅪Sg/V P I/Ddem+ VL3 R V P
|
||||
> school - age children , in the identification of words as nouns , verbs , adjectives ,
|
||||
# N🅪Sg/V . N🅪Sg/V+ NPl+ . NPr/J/P D Nᴹ P NPl/V3+ NSg/R NPl/V3 . NPl/V3+ . NPl/V3 .
|
||||
> In corpus linguistics , part - of - speech tagging ( POS tagging or PoS tagging or
|
||||
# NPr/J/P NSg+ Nᴹ+ . NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg . NSg+ NSg/Vg NPr/C NSg+ NSg/Vg NPr/C
|
||||
> POST ) , also called grammatical tagging is the process of marking up a word in a
|
||||
# NPr🅪/VB/P+ . . R/C VP/J J NSg/Vg VL3 D NSg/VB P Nᴹ/Vg/J NSg/VB/J/P D/P NSg/VB+ NPr/J/P D/P
|
||||
> text ( corpus ) as corresponding to a particular part of speech , based on both its
|
||||
# N🅪Sg/VB+ . NSg+ . NSg/R Nᴹ/Vg/J P D/P NSg/J NSg/VB/J P N🅪Sg/VB+ . VP/J J/P I/C/Dq ISg/D$+
|
||||
> definition and its context . A simplified form of this is commonly taught to
|
||||
# NSg VB/C ISg/D$+ N🅪Sg/VB+ . D/P VP/J N🅪Sg/VB P I/Ddem+ VL3 R VB P
|
||||
> school - age children , in the identification of words as nouns , verbs , adjectives ,
|
||||
# N🅪Sg/VB . N🅪Sg/VB+ NPl+ . NPr/J/P D Nᴹ P NPl/V3+ NSg/R NPl/V3 . NPl/V3+ . NPl/V3 .
|
||||
> adverbs , etc.
|
||||
# NPl/V3 . +
|
||||
>
|
||||
#
|
||||
> Once performed by hand , POS tagging is now done in the context of computational
|
||||
# NSg/C VP/J NSg/J/P NSg/V+ . NSg+ NSg/V VL3 NPr/V/J/C NSg/VPp/J NPr/J/P D N🅪Sg/V P J
|
||||
> linguistics , using algorithms which associate discrete terms , as well as hidden
|
||||
# Nᴹ+ . Nᴹ/Vg/J NPl+ I/C+ NSg/V/J+ J NPl/V3+ . NSg/R NSg/V/J NSg/R V/J
|
||||
> parts of speech , by a set of descriptive tags . POS - tagging algorithms fall into
|
||||
# NPl/V3 P N🅪Sg/V+ . NSg/J/P D/P NPr/V/J P NSg/J NPl/V3+ . NSg+ . NSg/V NPl+ N🅪Sg/VB+ P
|
||||
> two distinctive groups : rule - based and stochastic . E. Brill's tagger , one of the
|
||||
# NSg NSg/J NPl/V3+ . NSg/V+ . VP/J V/C J . ? ? NSg . NSg/I/V/J P D
|
||||
> first and most widely used English POS - taggers , employs rule - based algorithms .
|
||||
# NSg/V/J V/C NSg/I/J/R/Dq R VPPtPp/J NPr🅪/V/J+ NSg+ . NPl . NPl/V3 NSg/V+ . VP/J NPl+ .
|
||||
> Once performed by hand , POS tagging is now done in the context of computational
|
||||
# NSg/C VP/J NSg/J/P NSg/VB+ . NSg+ NSg/Vg VL3 NPr/VB/J/C NSg/VPp/J NPr/J/P D N🅪Sg/VB P J
|
||||
> linguistics , using algorithms which associate discrete terms , as well as hidden
|
||||
# Nᴹ+ . Nᴹ/Vg/J NPl+ I/C+ NSg/VB/J+ J NPl/V3+ . NSg/R NSg/VB/J NSg/R VB/J
|
||||
> parts of speech , by a set of descriptive tags . POS - tagging algorithms fall into
|
||||
# NPl/V3 P N🅪Sg/VB+ . NSg/J/P D/P NPr/VB/J P NSg/J NPl/V3+ . NSg+ . NSg/Vg NPl+ N🅪Sg/VB+ P
|
||||
> two distinctive groups : rule - based and stochastic . E. Brill's tagger , one of the
|
||||
# NSg NSg/J NPl/V3+ . NSg/VB+ . VP/J VB/C J . ? ? NSg . NSg/I/VB/J P D
|
||||
> first and most widely used English POS - taggers , employs rule - based algorithms .
|
||||
# NSg/VB/J VB/C NSg/I/J/R/Dq R VP/J NPr🅪/VB/J+ NSg+ . NPl . NPl/V3 NSg/VB+ . VP/J NPl+ .
|
||||
>
|
||||
#
|
||||
> Principle
|
||||
# N🅪Sg/V+
|
||||
# N🅪Sg/VB+
|
||||
>
|
||||
#
|
||||
> Part - of - speech tagging is harder than just having a list of words and their
|
||||
# NSg/V/J+ . P . N🅪Sg/V+ NSg/V VL3 JC C/P V/J Nᴹ/Vg/J D/P NSg/V P NPl/V3+ V/C D$+
|
||||
> parts of speech , because some words can represent more than one part of speech
|
||||
# NPl/V3 P N🅪Sg/V+ . C/P I/J/R/Dq NPl/V3+ NPr/VX V NPr/I/V/J/R/Dq C/P NSg/I/V/J NSg/V/J P N🅪Sg/V+
|
||||
> at different times , and because some parts of speech are complex . This is not
|
||||
# NSg/P NSg/J NPl/V3+ . V/C C/P I/J/R/Dq NPl/V3 P N🅪Sg/V+ V NSg/V/J . I/Ddem+ VL3 NSg/C
|
||||
> rare — in natural languages ( as opposed to many artificial languages ) , a large
|
||||
# NSg/V/J . NPr/J/P NSg/J+ NPl/V3+ . NSg/R VP/J P NSg/I/J/Dq+ J+ NPl/V3+ . . D/P NSg/J
|
||||
> percentage of word - forms are ambiguous . For example , even " dogs " , which is
|
||||
# N🅪Sg P NSg/V+ . NPl/V3+ V J . C/P NSg/V+ . NSg/V/J . NPl/V3+ . . I/C+ VL3
|
||||
> usually thought of as just a plural noun , can also be a verb :
|
||||
# R N🅪Sg/VPtPp P NSg/R V/J D/P+ NSg/J+ NSg/V+ . NPr/VX R/C NSg/VXB D/P+ NSg/V+ .
|
||||
> Part - of - speech tagging is harder than just having a list of words and their
|
||||
# NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg VL3 JC C/P VB/J Nᴹ/Vg/J D/P NSg/VB P NPl/V3+ VB/C D$+
|
||||
> parts of speech , because some words can represent more than one part of speech
|
||||
# NPl/V3 P N🅪Sg/VB+ . C/P I/J/R/Dq NPl/V3+ NPr/VXB VB NPr/I/VB/J/R/Dq C/P NSg/I/VB/J NSg/VB/J P N🅪Sg/VB+
|
||||
> at different times , and because some parts of speech are complex . This is not
|
||||
# NSg/P NSg/J NPl/V3+ . VB/C C/P I/J/R/Dq NPl/V3 P N🅪Sg/VB+ VB NSg/VB/J . I/Ddem+ VL3 NSg/C
|
||||
> rare — in natural languages ( as opposed to many artificial languages ) , a large
|
||||
# NSg/VB/J . NPr/J/P NSg/J+ NPl/V3+ . NSg/R VP/J P NSg/I/J/Dq+ J+ NPl/V3+ . . D/P NSg/J
|
||||
> percentage of word - forms are ambiguous . For example , even " dogs " , which is
|
||||
# N🅪Sg P NSg/VB+ . NPl/V3+ VB J . C/P NSg/VB+ . NSg/VB/J . NPl/V3+ . . I/C+ VL3
|
||||
> usually thought of as just a plural noun , can also be a verb :
|
||||
# R N🅪Sg/VP P NSg/R VB/J D/P+ NSg/J+ NSg/VB+ . NPr/VXB R/C NSg/VXB D/P+ NSg/VB+ .
|
||||
>
|
||||
#
|
||||
> The sailor dogs the hatch .
|
||||
# D+ NSg+ NPl/V3+ D+ NSg/V+ .
|
||||
> The sailor dogs the hatch .
|
||||
# D+ NSg+ NPl/V3+ D+ NSg/VB+ .
|
||||
>
|
||||
#
|
||||
> Correct grammatical tagging will reflect that " dogs " is here used as a verb , not
|
||||
# NSg/V/J J NSg/V NPr/VX V NSg/I/C/Ddem+ . NPl/V3+ . VL3 NSg/J/R VPPtPp/J NSg/R D/P NSg/V+ . NSg/C
|
||||
> as the more common plural noun . Grammatical context is one way to determine
|
||||
# NSg/R D NPr/I/V/J/R/Dq NSg/V/J NSg/J NSg/V+ . J+ N🅪Sg/V+ VL3 NSg/I/V/J NSg/J P V
|
||||
> this ; semantic analysis can also be used to infer that " sailor " and " hatch "
|
||||
# I/Ddem+ . NSg/J+ N🅪Sg+ NPr/VX R/C NSg/VXB VPPtPp/J P V NSg/I/C/Ddem+ . NSg+ . V/C . NSg/V .
|
||||
> implicate " dogs " as 1 ) in the nautical context and 2 ) an action applied to the
|
||||
# NSg/V . NPl/V3+ . NSg/R # . NPr/J/P D J N🅪Sg/V+ V/C # . D/P N🅪Sg/V/J+ VP/J P D
|
||||
> object " hatch " ( in this context , " dogs " is a nautical term meaning " fastens ( a
|
||||
# NSg/V+ . NSg/V . . NPr/J/P I/Ddem N🅪Sg/V+ . . NPl/V3+ . VL3 D/P J NSg/V/J+ N🅪Sg/Vg/J+ . V3 . D/P
|
||||
> watertight door ) securely " ) .
|
||||
# J NSg/V+ . R . . .
|
||||
> Correct grammatical tagging will reflect that " dogs " is here used as a verb , not
|
||||
# NSg/VB/J J NSg/Vg NPr/VXB VB NSg/I/C/Ddem+ . NPl/V3+ . VL3 NSg/J/R VP/J NSg/R D/P NSg/VB+ . NSg/C
|
||||
> as the more common plural noun . Grammatical context is one way to determine
|
||||
# NSg/R D NPr/I/VB/J/R/Dq NSg/VB/J NSg/J NSg/VB+ . J+ N🅪Sg/VB+ VL3 NSg/I/VB/J NSg/J P VB
|
||||
> this ; semantic analysis can also be used to infer that " sailor " and " hatch "
|
||||
# I/Ddem+ . NSg/J+ N🅪Sg+ NPr/VXB R/C NSg/VXB VP/J P VB NSg/I/C/Ddem+ . NSg+ . VB/C . NSg/VB .
|
||||
> implicate " dogs " as 1 ) in the nautical context and 2 ) an action applied to the
|
||||
# NSg/VB . NPl/V3+ . NSg/R # . NPr/J/P D J N🅪Sg/VB+ VB/C # . D/P N🅪Sg/VB/J+ VP/J P D
|
||||
> object " hatch " ( in this context , " dogs " is a nautical term meaning " fastens ( a
|
||||
# NSg/VB+ . NSg/VB . . NPr/J/P I/Ddem N🅪Sg/VB+ . . NPl/V3+ . VL3 D/P J NSg/VB/J+ N🅪Sg/Vg/J+ . V3 . D/P
|
||||
> watertight door ) securely " ) .
|
||||
# J NSg/VB+ . R . . .
|
||||
>
|
||||
#
|
||||
> Tag sets
|
||||
# NSg/V+ NPl/V3
|
||||
> Tag sets
|
||||
# NSg/VB+ NPl/V3
|
||||
>
|
||||
#
|
||||
> Schools commonly teach that there are 9 parts of speech in English : noun , verb ,
|
||||
# NPl/V3+ R NSg/V NSg/I/C/Ddem + V # NPl/V3 P N🅪Sg/V NPr/J/P NPr🅪/V/J . NSg/V+ . NSg/V+ .
|
||||
> article , adjective , preposition , pronoun , adverb , conjunction , and interjection .
|
||||
# NSg/V+ . NSg/V/J+ . NSg/V . NSg/V+ . NSg/V+ . NSg/V+ . V/C N🅪Sg+ .
|
||||
> However , there are clearly many more categories and sub - categories . For nouns ,
|
||||
# C . + V R NSg/I/J/Dq+ NPr/I/V/J/R/Dq+ NPl+ V/C NSg/V/P . NPl+ . C/P NPl/V3 .
|
||||
> the plural , possessive , and singular forms can be distinguished . In many
|
||||
# D NSg/J . NSg/J . V/C NSg/J NPl/V3+ NPr/VX NSg/VXB VP/J . NPr/J/P NSg/I/J/Dq+
|
||||
> languages words are also marked for their " case " ( role as subject , object ,
|
||||
# NPl/V3+ NPl/V3+ V R/C VP/J C/P D$+ . NPr🅪/V+ . . NSg NSg/R NSg/V/J+ . NSg/V+ .
|
||||
> etc. ) , grammatical gender , and so on ; while verbs are marked for tense , aspect ,
|
||||
# + . . J+ N🅪Sg/V/J+ . V/C NSg/I/J/C J/P . NSg/V/C/P NPl/V3+ V VP/J C/P NSg/V/J . NSg/V+ .
|
||||
> and other things . In some tagging systems , different inflections of the same
|
||||
# V/C NSg/V/J+ NPl+ . NPr/J/P I/J/R/Dq NSg/V NPl+ . NSg/J NPl P D I/J
|
||||
> root word will get different parts of speech , resulting in a large number of
|
||||
# NPr/V+ NSg/V+ NPr/VX NSg/V NSg/J NPl/V3 P N🅪Sg/V+ . Nᴹ/Vg/J NPr/J/P D/P NSg/J N🅪Sg/V/JC P
|
||||
> tags . For example , NN for singular common nouns , NNS for plural common nouns , NP
|
||||
# NPl/V3+ . C/P NSg/V+ . ? C/P NSg/J NSg/V/J NPl/V3 . ? C/P NSg/J NSg/V/J NPl/V3 . NPr
|
||||
> for singular proper nouns ( see the POS tags used in the Brown Corpus ) . Other
|
||||
# C/P NSg/J NSg/J NPl/V3 . NSg/V D NSg+ NPl/V3+ VPPtPp/J NPr/J/P D NPr🅪/V/J NSg+ . . NSg/V/J
|
||||
> tagging systems use a smaller number of tags and ignore fine differences or
|
||||
# NSg/V NPl+ N🅪Sg/VB D/P NSg/JC N🅪Sg/V/JC P NPl/V3+ V/C V NSg/V/J NPl/V NPr/C
|
||||
> model them as features somewhat independent from part - of - speech .
|
||||
# NSg/V/J+ NSg/IPl+ NSg/R NPl/V3+ NSg/I/R NSg/J P NSg/V/J+ . P . N🅪Sg/V+ .
|
||||
> Schools commonly teach that there are 9 parts of speech in English : noun , verb ,
|
||||
# NPl/V3+ R NSg/VB NSg/I/C/Ddem + VB # NPl/V3 P N🅪Sg/VB NPr/J/P NPr🅪/VB/J . NSg/VB+ . NSg/VB+ .
|
||||
> article , adjective , preposition , pronoun , adverb , conjunction , and interjection .
|
||||
# NSg/VB+ . NSg/VB/J+ . NSg/VB . NSg/VB+ . NSg/VB+ . NSg/VB+ . VB/C N🅪Sg+ .
|
||||
> However , there are clearly many more categories and sub - categories . For nouns ,
|
||||
# C . + VB R NSg/I/J/Dq+ NPr/I/VB/J/R/Dq+ NPl+ VB/C NSg/VB/P . NPl+ . C/P NPl/V3 .
|
||||
> the plural , possessive , and singular forms can be distinguished . In many
|
||||
# D NSg/J . NSg/J . VB/C NSg/J NPl/V3+ NPr/VXB NSg/VXB VP/J . NPr/J/P NSg/I/J/Dq+
|
||||
> languages words are also marked for their " case " ( role as subject , object ,
|
||||
# NPl/V3+ NPl/V3+ VB R/C VP/J C/P D$+ . NPr🅪/VB+ . . NSg NSg/R NSg/VB/J+ . NSg/VB+ .
|
||||
> etc. ) , grammatical gender , and so on ; while verbs are marked for tense , aspect ,
|
||||
# + . . J+ N🅪Sg/VB/J+ . VB/C NSg/I/J/C J/P . NSg/VB/C/P NPl/V3+ VB VP/J C/P NSg/VB/J . NSg/VB+ .
|
||||
> and other things . In some tagging systems , different inflections of the same
|
||||
# VB/C NSg/VB/J+ NPl+ . NPr/J/P I/J/R/Dq NSg/Vg NPl+ . NSg/J NPl P D I/J
|
||||
> root word will get different parts of speech , resulting in a large number of
|
||||
# NPr/VB+ NSg/VB+ NPr/VXB NSg/VB NSg/J NPl/V3 P N🅪Sg/VB+ . Nᴹ/Vg/J NPr/J/P D/P NSg/J N🅪Sg/VB/JC P
|
||||
> tags . For example , NN for singular common nouns , NNS for plural common nouns , NP
|
||||
# NPl/V3+ . C/P NSg/VB+ . ? C/P NSg/J NSg/VB/J NPl/V3 . ? C/P NSg/J NSg/VB/J NPl/V3 . NPr
|
||||
> for singular proper nouns ( see the POS tags used in the Brown Corpus ) . Other
|
||||
# C/P NSg/J NSg/J NPl/V3 . NSg/VB D NSg+ NPl/V3+ VP/J NPr/J/P D NPr🅪/VB/J NSg+ . . NSg/VB/J
|
||||
> tagging systems use a smaller number of tags and ignore fine differences or
|
||||
# NSg/Vg NPl+ N🅪Sg/VB D/P NSg/JC N🅪Sg/VB/JC P NPl/V3+ VB/C VB NSg/VB/J NPl/VB NPr/C
|
||||
> model them as features somewhat independent from part - of - speech .
|
||||
# NSg/VB/J+ NSg/IPl+ NSg/R NPl/V3+ NSg/I/R NSg/J P NSg/VB/J+ . P . N🅪Sg/VB+ .
|
||||
>
|
||||
#
|
||||
> In part - of - speech tagging by computer , it is typical to distinguish from 50 to
|
||||
# NPr/J/P NSg/V/J+ . P . N🅪Sg/V+ NSg/V NSg/J/P NSg/V+ . NPr/ISg+ VL3 NSg/J P V P # P
|
||||
> 150 separate parts of speech for English . Work on stochastic methods for tagging
|
||||
# # NSg/V/J NPl/V3 P N🅪Sg/V C/P NPr🅪/V/J+ . N🅪Sg/V J/P J NPl/V3+ C/P NSg/V
|
||||
> Koine Greek ( DeRose 1990 ) has used over 1 , 000 parts of speech and found that
|
||||
# ? NPr/V/J . ? # . V3 VPPtPp/J NSg/J/P # . # NPl/V3 P N🅪Sg/V+ V/C NSg/V NSg/I/C/Ddem
|
||||
> about as many words were ambiguous in that language as in English . A
|
||||
# J/P NSg/R NSg/I/J/Dq NPl/V3+ NSg/VPt J NPr/J/P NSg/I/C/Ddem N🅪Sg/V+ NSg/R NPr/J/P NPr🅪/V/J+ . D/P
|
||||
> morphosyntactic descriptor in the case of morphologically rich languages is
|
||||
# ? NSg NPr/J/P D NPr🅪/V P ? NPr/V/J NPl/V3+ VL3
|
||||
> commonly expressed using very short mnemonics , such as Ncmsan for Category = Noun ,
|
||||
# R VP/J Nᴹ/Vg/J J/R NPr/V/J/P NPl . NSg/I NSg/R ? C/P NSg+ . NSg/V+ .
|
||||
> Type = common , Gender = masculine , Number = singular , Case = accusative , Animate
|
||||
# NSg/V+ . NSg/V/J . N🅪Sg/V/J+ . NSg/J . N🅪Sg/V/JC+ . NSg/J . NPr🅪/V+ . NSg/J . V/J
|
||||
> In part - of - speech tagging by computer , it is typical to distinguish from 50 to
|
||||
# NPr/J/P NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg NSg/J/P NSg/VB+ . NPr/ISg+ VL3 NSg/J P VB P # P
|
||||
> 150 separate parts of speech for English . Work on stochastic methods for tagging
|
||||
# # NSg/VB/J NPl/V3 P N🅪Sg/VB C/P NPr🅪/VB/J+ . N🅪Sg/VB J/P J NPl/V3+ C/P NSg/Vg
|
||||
> Koine Greek ( DeRose 1990 ) has used over 1 , 000 parts of speech and found that
|
||||
# ? NPr/VB/J . ? # . V3 VP/J NSg/J/P # . # NPl/V3 P N🅪Sg/VB+ VB/C NSg/VB NSg/I/C/Ddem
|
||||
> about as many words were ambiguous in that language as in English . A
|
||||
# J/P NSg/R NSg/I/J/Dq NPl/V3+ NSg/VPt J NPr/J/P NSg/I/C/Ddem N🅪Sg/VB+ NSg/R NPr/J/P NPr🅪/VB/J+ . D/P
|
||||
> morphosyntactic descriptor in the case of morphologically rich languages is
|
||||
# ? NSg NPr/J/P D NPr🅪/VB P ? NPr/VB/J NPl/V3+ VL3
|
||||
> commonly expressed using very short mnemonics , such as Ncmsan for Category = Noun ,
|
||||
# R VP/J Nᴹ/Vg/J J/R NPr/VB/J/P NPl . NSg/I NSg/R ? C/P NSg+ . NSg/VB+ .
|
||||
> Type = common , Gender = masculine , Number = singular , Case = accusative , Animate
|
||||
# NSg/VB+ . NSg/VB/J . N🅪Sg/VB/J+ . NSg/J . N🅪Sg/VB/JC+ . NSg/J . NPr🅪/VB+ . NSg/J . VB/J
|
||||
> = no .
|
||||
# . NPr/P .
|
||||
>
|
||||
#
|
||||
> The most popular " tag set " for POS tagging for American English is probably the
|
||||
# D NSg/I/J/R/Dq NSg/J . NSg/V NPr/V/J . C/P NSg+ NSg/V C/P NPr/J NPr🅪/V/J+ VL3 R D
|
||||
> Penn tag set , developed in the Penn Treebank project . It is largely similar to
|
||||
# NPr+ NSg/V+ NPr/V/J . VP/J NPr/J/P D NPr+ ? NSg/V+ . NPr/ISg+ VL3 R NSg/J P
|
||||
> the earlier Brown Corpus and LOB Corpus tag sets , though much smaller . In
|
||||
# D JC NPr🅪/V/J NSg V/C NSg/V NSg+ NSg/V+ NPl/V3 . V/C NSg/I/J/R/Dq NSg/JC . NPr/J/P
|
||||
> Europe , tag sets from the Eagles Guidelines see wide use and include versions
|
||||
# NPr+ . NSg/V+ NPl/V3 P D NPl/V3 NPl+ NSg/V NSg/J N🅪Sg/VB+ V/C NSg/V NPl/V3+
|
||||
> The most popular " tag set " for POS tagging for American English is probably the
|
||||
# D NSg/I/J/R/Dq NSg/J . NSg/VB NPr/VB/J . C/P NSg+ NSg/Vg C/P NPr/J NPr🅪/VB/J+ VL3 R D
|
||||
> Penn tag set , developed in the Penn Treebank project . It is largely similar to
|
||||
# NPr+ NSg/VB+ NPr/VB/J . VP/J NPr/J/P D NPr+ ? NSg/VB+ . NPr/ISg+ VL3 R NSg/J P
|
||||
> the earlier Brown Corpus and LOB Corpus tag sets , though much smaller . In
|
||||
# D JC NPr🅪/VB/J NSg VB/C NSg/VB NSg+ NSg/VB+ NPl/V3 . VB/C NSg/I/J/R/Dq NSg/JC . NPr/J/P
|
||||
> Europe , tag sets from the Eagles Guidelines see wide use and include versions
|
||||
# NPr+ . NSg/VB+ NPl/V3 P D NPl/V3 NPl+ NSg/VB NSg/J N🅪Sg/VB+ VB/C NSg/VB NPl/V3+
|
||||
> for multiple languages .
|
||||
# C/P NSg/J/Dq NPl/V3+ .
|
||||
>
|
||||
#
|
||||
> POS tagging work has been done in a variety of languages , and the set of POS
|
||||
# NSg+ NSg/V N🅪Sg/V+ V3 NSg/VPp NSg/VPp/J NPr/J/P D/P NSg P NPl/V3+ . V/C D NPr/V/J P NSg+
|
||||
> tags used varies greatly with language . Tags usually are designed to include
|
||||
# NPl/V3+ VPPtPp/J NPl/V3 R P N🅪Sg/V+ . NPl/V3+ R V VP/J P NSg/V
|
||||
> POS tagging work has been done in a variety of languages , and the set of POS
|
||||
# NSg+ NSg/Vg N🅪Sg/VB+ V3 NSg/VPp NSg/VPp/J NPr/J/P D/P NSg P NPl/V3+ . VB/C D NPr/VB/J P NSg+
|
||||
> tags used varies greatly with language . Tags usually are designed to include
|
||||
# NPl/V3+ VP/J NPl/V3 R P N🅪Sg/VB+ . NPl/V3+ R VB VP/J P NSg/VB
|
||||
> overt morphological distinctions , although this leads to inconsistencies such as
|
||||
# NSg/J+ J+ NPl+ . C I/Ddem NPl/V3 P NPl NSg/I NSg/R
|
||||
> case - marking for pronouns but not nouns in English , and much larger
|
||||
# NPr🅪/V+ . Nᴹ/Vg/J C/P NPl/V3 NSg/C/P NSg/C NPl/V3 NPr/J/P NPr🅪/V/J+ . V/C NSg/I/J/R/Dq JC
|
||||
> cross - language differences . The tag sets for heavily inflected languages such as
|
||||
# NPr/V/J/P+ . N🅪Sg/V+ NPl/V+ . D+ NSg/V+ NPl/V3 C/P R VP/J NPl/V3+ NSg/I NSg/R
|
||||
> Greek and Latin can be very large ; tagging words in agglutinative languages such
|
||||
# NPr/V/J V/C NPr/J NPr/VX NSg/VXB J/R NSg/J . NSg/V NPl/V3+ NPr/J/P ? NPl/V3+ NSg/I
|
||||
> as Inuit languages may be virtually impossible . At the other extreme , Petrov et
|
||||
# NSg/R NPr/J NPl/V3+ NPr/VX NSg/VXB R NSg/J . NSg/P D NSg/V/J NSg/J . ? ?
|
||||
> al. have proposed a " universal " tag set , with 12 categories ( for example , no
|
||||
# ? NSg/VX VP/J D/P . NSg/J . NSg/V+ NPr/V/J . P # NPl+ . C/P NSg/V+ . NPr/P
|
||||
> subtypes of nouns , verbs , punctuation , and so on ) . Whether a very small set of
|
||||
# NPl P NPl/V3 . NPl/V3+ . Nᴹ+ . V/C NSg/I/J/C J/P . . I/C D/P J/R NPr/V/J NPr/V/J P
|
||||
> very broad tags or a much larger set of more precise ones is preferable , depends
|
||||
# J/R NSg/J NPl/V3+ NPr/C D/P NSg/I/J/R/Dq JC NPr/V/J P NPr/I/V/J/R/Dq V/J+ NPl/V3+ VL3 W? . NPl/V3
|
||||
> on the purpose at hand . Automatic tagging is easier on smaller tag - sets .
|
||||
# J/P D N🅪Sg/V+ NSg/P NSg/V+ . NSg/J NSg/V VL3 NSg/JC J/P NSg/JC NSg/V+ . NPl/V3 .
|
||||
> case - marking for pronouns but not nouns in English , and much larger
|
||||
# NPr🅪/VB+ . Nᴹ/Vg/J C/P NPl/V3 NSg/C/P NSg/C NPl/V3 NPr/J/P NPr🅪/VB/J+ . VB/C NSg/I/J/R/Dq JC
|
||||
> cross - language differences . The tag sets for heavily inflected languages such as
|
||||
# NPr/VB/J/P+ . N🅪Sg/VB+ NPl/VB+ . D+ NSg/VB+ NPl/V3 C/P R VP/J NPl/V3+ NSg/I NSg/R
|
||||
> Greek and Latin can be very large ; tagging words in agglutinative languages such
|
||||
# NPr/VB/J VB/C NPr/J NPr/VXB NSg/VXB J/R NSg/J . NSg/Vg NPl/V3+ NPr/J/P ? NPl/V3+ NSg/I
|
||||
> as Inuit languages may be virtually impossible . At the other extreme , Petrov et
|
||||
# NSg/R NPr/J NPl/V3+ NPr/VXB NSg/VXB R NSg/J . NSg/P D NSg/VB/J NSg/J . ? ?
|
||||
> al. have proposed a " universal " tag set , with 12 categories ( for example , no
|
||||
# ? NSg/VXB VP/J D/P . NSg/J . NSg/VB+ NPr/VB/J . P # NPl+ . C/P NSg/VB+ . NPr/P
|
||||
> subtypes of nouns , verbs , punctuation , and so on ) . Whether a very small set of
|
||||
# NPl P NPl/V3 . NPl/V3+ . Nᴹ+ . VB/C NSg/I/J/C J/P . . I/C D/P J/R NPr/VB/J NPr/VB/J P
|
||||
> very broad tags or a much larger set of more precise ones is preferable , depends
|
||||
# J/R NSg/J NPl/V3+ NPr/C D/P NSg/I/J/R/Dq JC NPr/VB/J P NPr/I/VB/J/R/Dq VB/J+ NPl/V3+ VL3 W? . NPl/V3
|
||||
> on the purpose at hand . Automatic tagging is easier on smaller tag - sets .
|
||||
# J/P D N🅪Sg/VB+ NSg/P NSg/VB+ . NSg/J NSg/Vg VL3 NSg/JC J/P NSg/JC NSg/VB+ . NPl/V3 .
|
||||
>
|
||||
#
|
||||
> History
|
||||
# N🅪Sg+
|
||||
>
|
||||
#
|
||||
> The Brown Corpus
|
||||
# D+ NPr🅪/V/J+ NSg+
|
||||
> The Brown Corpus
|
||||
# D+ NPr🅪/VB/J+ NSg+
|
||||
>
|
||||
#
|
||||
> Research on part - of - speech tagging has been closely tied to corpus linguistics .
|
||||
# Nᴹ/V J/P NSg/V/J+ . P . N🅪Sg/V+ NSg/V V3 NSg/VPp R VP/J P NSg Nᴹ+ .
|
||||
> The first major corpus of English for computer analysis was the Brown Corpus
|
||||
# D NSg/V/J NPr/V/J NSg P NPr🅪/V/J C/P NSg/V+ N🅪Sg+ VPt D NPr🅪/V/J NSg
|
||||
> developed at Brown University by Henry Kučera and W. Nelson Francis , in the
|
||||
# VP/J NSg/P NPr🅪/V/J NSg+ NSg/J/P NPr+ ? V/C ? NPr+ NPr+ . NPr/J/P D
|
||||
> mid - 1960s . It consists of about 1 , 000 , 000 words of running English prose text ,
|
||||
# NSg/J/P+ . #d . NPr/ISg+ NPl/V3 P J/P # . # . # NPl/V3 P Nᴹ/V/J/P NPr🅪/V/J+ Nᴹ/V N🅪Sg/V+ .
|
||||
> made up of 500 samples from randomly chosen publications . Each sample is 2 , 000
|
||||
# V NSg/V/J/P P # NPl/V3+ P R Nᴹ/V/J NPl+ . Dq+ NSg/V+ VL3 # . #
|
||||
> or more words ( ending at the first sentence - end after 2 , 000 words , so that the
|
||||
# NPr/C NPr/I/V/J/R/Dq NPl/V3+ . Nᴹ/Vg/J NSg/P D NSg/V/J NSg/V+ . NSg/V+ P # . # NPl/V3+ . NSg/I/J/C NSg/I/C/Ddem D
|
||||
> Research on part - of - speech tagging has been closely tied to corpus linguistics .
|
||||
# Nᴹ/VB J/P NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg V3 NSg/VPp R VP/J P NSg Nᴹ+ .
|
||||
> The first major corpus of English for computer analysis was the Brown Corpus
|
||||
# D NSg/VB/J NPr/VB/J NSg P NPr🅪/VB/J C/P NSg/VB+ N🅪Sg+ VPt D NPr🅪/VB/J NSg
|
||||
> developed at Brown University by Henry Kučera and W. Nelson Francis , in the
|
||||
# VP/J NSg/P NPr🅪/VB/J NSg+ NSg/J/P NPr+ ? VB/C ? NPr+ NPr+ . NPr/J/P D
|
||||
> mid - 1960s . It consists of about 1 , 000 , 000 words of running English prose text ,
|
||||
# NSg/J/P+ . #d . NPr/ISg+ NPl/V3 P J/P # . # . # NPl/V3 P Nᴹ/Vg/J/P NPr🅪/VB/J+ Nᴹ/VB N🅪Sg/VB+ .
|
||||
> made up of 500 samples from randomly chosen publications . Each sample is 2 , 000
|
||||
# VB NSg/VB/J/P P # NPl/V3+ P R Nᴹ/VPp/J NPl+ . Dq+ NSg/VB+ VL3 # . #
|
||||
> or more words ( ending at the first sentence - end after 2 , 000 words , so that the
|
||||
# NPr/C NPr/I/VB/J/R/Dq NPl/V3+ . Nᴹ/Vg/J NSg/P D NSg/VB/J NSg/VB+ . NSg/VB+ P # . # NPl/V3+ . NSg/I/J/C NSg/I/C/Ddem D
|
||||
> corpus contains only complete sentences ) .
|
||||
# NSg+ V3 J/R/C NSg/V/J NPl/V3+ . .
|
||||
# NSg+ V3 J/R/C NSg/VB/J NPl/V3+ . .
|
||||
>
|
||||
#
|
||||
> The Brown Corpus was painstakingly " tagged " with part - of - speech markers over
|
||||
# D+ NPr🅪/V/J+ NSg+ VPt R . V/J . P NSg/V/J+ . P . N🅪Sg/V+ NPl/V3 NSg/J/P
|
||||
> many years . A first approximation was done with a program by Greene and Rubin ,
|
||||
# NSg/I/J/Dq+ NPl+ . D/P+ NSg/V/J+ N🅪Sg+ VPt NSg/VPp/J P D/P+ NPr/V+ NSg/J/P NPr V/C NPr .
|
||||
> which consisted of a huge handmade list of what categories could co - occur at
|
||||
# I/C+ VP/J P D/P J NSg/J NSg/V P NSg/I+ NPl+ NSg/VX NPr/I/V+ . V NSg/P
|
||||
> all . For example , article then noun can occur , but article then verb ( arguably )
|
||||
# NSg/I/J/C/Dq . C/P NSg/V+ . NSg/V+ NSg/J/C NSg/V+ NPr/VX V . NSg/C/P NSg/V+ NSg/J/C NSg/V+ . R .
|
||||
> cannot . The program got about 70 % correct . Its results were repeatedly reviewed
|
||||
# NSg/V . D+ NPr/V+ V J/P # . NSg/V/J . ISg/D$+ NPl/V3+ NSg/VPt R VP/J
|
||||
> and corrected by hand , and later users sent in errata so that by the late 70 s
|
||||
# V/C VP/J NSg/J/P NSg/V+ . V/C JC NPl+ NSg/V NPr/J/P NSg NSg/I/J/C NSg/I/C/Ddem+ NSg/J/P D NSg/J # ?
|
||||
> the tagging was nearly perfect ( allowing for some cases on which even human
|
||||
# D NSg/V VPt R NSg/V/J . Nᴹ/Vg/J C/P I/J/R/Dq NPl/V3+ J/P I/C+ NSg/V/J NSg/V/J+
|
||||
> speakers might not agree ) .
|
||||
# + Nᴹ/VX/J NSg/C V . .
|
||||
> The Brown Corpus was painstakingly " tagged " with part - of - speech markers over
|
||||
# D+ NPr🅪/VB/J+ NSg+ VPt R . VP/J . P NSg/VB/J+ . P . N🅪Sg/VB+ NPl/V3 NSg/J/P
|
||||
> many years . A first approximation was done with a program by Greene and Rubin ,
|
||||
# NSg/I/J/Dq+ NPl+ . D/P+ NSg/VB/J+ N🅪Sg+ VPt NSg/VPp/J P D/P+ NPr/VB+ NSg/J/P NPr VB/C NPr .
|
||||
> which consisted of a huge handmade list of what categories could co - occur at
|
||||
# I/C+ VP/J P D/P J NSg/J NSg/VB P NSg/I+ NPl+ NSg/VXB NPr/I/VB+ . VB NSg/P
|
||||
> all . For example , article then noun can occur , but article then verb ( arguably )
|
||||
# NSg/I/J/C/Dq . C/P NSg/VB+ . NSg/VB+ NSg/J/C NSg/VB+ NPr/VXB VB . NSg/C/P NSg/VB+ NSg/J/C NSg/VB+ . R .
|
||||
> cannot . The program got about 70 % correct . Its results were repeatedly reviewed
|
||||
# NSg/VB . D+ NPr/VB+ VB J/P # . NSg/VB/J . ISg/D$+ NPl/V3+ NSg/VPt R VP/J
|
||||
> and corrected by hand , and later users sent in errata so that by the late 70 s
|
||||
# VB/C VP/J NSg/J/P NSg/VB+ . VB/C JC NPl+ NSg/VB NPr/J/P NSg NSg/I/J/C NSg/I/C/Ddem+ NSg/J/P D NSg/J # ?
|
||||
> the tagging was nearly perfect ( allowing for some cases on which even human
|
||||
# D NSg/Vg VPt R NSg/VB/J . Nᴹ/Vg/J C/P I/J/R/Dq NPl/V3+ J/P I/C+ NSg/VB/J NSg/VB/J+
|
||||
> speakers might not agree ) .
|
||||
# + Nᴹ/VXB/J NSg/C VB . .
|
||||
>
|
||||
#
|
||||
> This corpus has been used for innumerable studies of word - frequency and of
|
||||
# I/Ddem+ NSg+ V3 NSg/VPp VPPtPp/J C/P J NPl/V3 P NSg/V+ . NSg V/C P
|
||||
> part - of - speech and inspired the development of similar " tagged " corpora in many
|
||||
# NSg/V/J+ . P . N🅪Sg/V+ V/C VP/J D N🅪Sg P NSg/J . V/J . NPl+ NPr/J/P NSg/I/J/Dq
|
||||
> other languages . Statistics derived by analyzing it formed the basis for most
|
||||
# NSg/V/J NPl/V3+ . NPl/V3+ VP/J NSg/J/P Nᴹ/Vg/J NPr/ISg+ VP/J D+ NSg+ C/P NSg/I/J/R/Dq
|
||||
> later part - of - speech tagging systems , such as CLAWS and VOLSUNGA . However , by
|
||||
# JC NSg/V/J+ . P . N🅪Sg/V+ NSg/V NPl+ . NSg/I NSg/R NPl/V3+ V/C ? . C . NSg/J/P
|
||||
> this time ( 2005 ) it has been superseded by larger corpora such as the 100
|
||||
# I/Ddem+ N🅪Sg/V/J+ . # . NPr/ISg+ V3 NSg/VPp VP/J NSg/J/P JC NPl+ NSg/I NSg/R D #
|
||||
> million word British National Corpus , even though larger corpora are rarely so
|
||||
# NSg NSg/V+ NPr/J NSg/J NSg+ . NSg/V/J V/C JC NPl+ V R NSg/I/J/C
|
||||
> This corpus has been used for innumerable studies of word - frequency and of
|
||||
# I/Ddem+ NSg+ V3 NSg/VPp VP/J C/P J NPl/V3 P NSg/VB+ . NSg VB/C P
|
||||
> part - of - speech and inspired the development of similar " tagged " corpora in many
|
||||
# NSg/VB/J+ . P . N🅪Sg/VB+ VB/C VP/J D N🅪Sg P NSg/J . VP/J . NPl+ NPr/J/P NSg/I/J/Dq
|
||||
> other languages . Statistics derived by analyzing it formed the basis for most
|
||||
# NSg/VB/J NPl/V3+ . NPl/V3+ VP/J NSg/J/P Nᴹ/Vg/J NPr/ISg+ VP/J D+ NSg+ C/P NSg/I/J/R/Dq
|
||||
> later part - of - speech tagging systems , such as CLAWS and VOLSUNGA . However , by
|
||||
# JC NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg NPl+ . NSg/I NSg/R NPl/V3+ VB/C ? . C . NSg/J/P
|
||||
> this time ( 2005 ) it has been superseded by larger corpora such as the 100
|
||||
# I/Ddem+ N🅪Sg/VB/J+ . # . NPr/ISg+ V3 NSg/VPp VP/J NSg/J/P JC NPl+ NSg/I NSg/R D #
|
||||
> million word British National Corpus , even though larger corpora are rarely so
|
||||
# NSg NSg/VB+ NPr/J NSg/J NSg+ . NSg/VB/J VB/C JC NPl+ VB R NSg/I/J/C
|
||||
> thoroughly curated .
|
||||
# R VP/J .
|
||||
>
|
||||
#
|
||||
> For some time , part - of - speech tagging was considered an inseparable part of
|
||||
# C/P I/J/R/Dq N🅪Sg/V/J+ . NSg/V/J+ . P . N🅪Sg/V+ NSg/V VPt VP/J D/P NSg/J NSg/V/J P
|
||||
> For some time , part - of - speech tagging was considered an inseparable part of
|
||||
# C/P I/J/R/Dq N🅪Sg/VB/J+ . NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg VPt VP/J D/P NSg/J NSg/VB/J P
|
||||
> natural language processing , because there are certain cases where the correct
|
||||
# NSg/J N🅪Sg/V+ Nᴹ/Vg/J+ . C/P + V I/J NPl/V3+ NSg/C D NSg/V/J
|
||||
> part of speech cannot be decided without understanding the semantics or even the
|
||||
# NSg/V/J P N🅪Sg/V+ NSg/V NSg/VXB NSg/VP/J C/P N🅪Sg/Vg/J+ D NPl+ NPr/C NSg/V/J D
|
||||
> pragmatics of the context . This is extremely expensive , especially because
|
||||
# NPl P D N🅪Sg/V+ . I/Ddem+ VL3 R J . R C/P
|
||||
> analyzing the higher levels is much harder when multiple part - of - speech
|
||||
# Nᴹ/Vg/J D+ NSg/JC+ NPl/V3+ VL3 NSg/I/J/R/Dq JC NSg/I/C NSg/J/Dq NSg/V/J . P . N🅪Sg/V+
|
||||
> possibilities must be considered for each word .
|
||||
# NPl+ NSg/V NSg/VXB VP/J C/P Dq+ NSg/V+ .
|
||||
# NSg/J N🅪Sg/VB+ Nᴹ/Vg/J+ . C/P + VB I/J NPl/V3+ NSg/C D NSg/VB/J
|
||||
> part of speech cannot be decided without understanding the semantics or even the
|
||||
# NSg/VB/J P N🅪Sg/VB+ NSg/VB NSg/VXB NSg/VP/J C/P N🅪Sg/Vg/J+ D NPl+ NPr/C NSg/VB/J D
|
||||
> pragmatics of the context . This is extremely expensive , especially because
|
||||
# NPl P D N🅪Sg/VB+ . I/Ddem+ VL3 R J . R C/P
|
||||
> analyzing the higher levels is much harder when multiple part - of - speech
|
||||
# Nᴹ/Vg/J D+ NSg/JC+ NPl/V3+ VL3 NSg/I/J/R/Dq JC NSg/I/C NSg/J/Dq NSg/VB/J . P . N🅪Sg/VB+
|
||||
> possibilities must be considered for each word .
|
||||
# NPl+ NSg/VB NSg/VXB VP/J C/P Dq+ NSg/VB+ .
|
||||
>
|
||||
#
|
||||
> Use of hidden Markov models
|
||||
# N🅪Sg/VB P V/J NPr NPl/V3+
|
||||
# N🅪Sg/VB P VB/J NPr NPl/V3+
|
||||
>
|
||||
#
|
||||
> In the mid - 1980s , researchers in Europe began to use hidden Markov models ( HMMs )
|
||||
# NPr/J/P D NSg/J/P+ . #d . NPl NPr/J/P NPr+ VPt P N🅪Sg/VB V/J NPr NPl/V3+ . ? .
|
||||
> to disambiguate parts of speech , when working to tag the Lancaster - Oslo - Bergen
|
||||
# P V NPl/V3 P N🅪Sg/V+ . NSg/I/C Nᴹ/Vg/J P NSg/V D NPr . NPr+ . NPr+
|
||||
> Corpus of British English . HMMs involve counting cases ( such as from the Brown
|
||||
# NSg P NPr/J NPr🅪/V/J+ . ? V Nᴹ/Vg/J NPl/V3+ . NSg/I NSg/R P D NPr🅪/V/J
|
||||
> Corpus ) and making a table of the probabilities of certain sequences . For
|
||||
# NSg+ . V/C Nᴹ/Vg/J D/P NSg/V P D NPl P I/J NPl/V3+ . C/P
|
||||
> example , once you've seen an article such as ' the ' , perhaps the next word is a
|
||||
# NSg/V+ . NSg/C W? NSg/VPp D/P NSg/V+ NSg/I NSg/R . D . . NSg/R D NSg/J/P NSg/V+ VL3 D/P
|
||||
> noun 40 % of the time , an adjective 40 % , and a number 20 % . Knowing this , a
|
||||
# NSg/V+ # . P D N🅪Sg/V/J+ . D/P NSg/V/J+ # . . V/C D/P N🅪Sg/V/JC+ # . . NSg/V/J/P I/Ddem+ . D/P+
|
||||
> program can decide that " can " in " the can " is far more likely to be a noun than
|
||||
# NPr/V+ NPr/VX V NSg/I/C/Ddem+ . NPr/VX . NPr/J/P . D+ NPr/VX . VL3 NSg/V/J NPr/I/V/J/R/Dq NSg/J P NSg/VXB D/P NSg/V C/P
|
||||
> a verb or a modal . The same method can , of course , be used to benefit from
|
||||
# D/P+ NSg/V+ NPr/C D/P NSg/J . D+ I/J+ NSg/V+ NPr/VX . P NSg/V+ . NSg/VXB VPPtPp/J P NSg/V P
|
||||
# NPr/J/P D NSg/J/P+ . #d . NPl NPr/J/P NPr+ VPt P N🅪Sg/VB VB/J NPr NPl/V3+ . ? .
|
||||
> to disambiguate parts of speech , when working to tag the Lancaster - Oslo - Bergen
|
||||
# P VB NPl/V3 P N🅪Sg/VB+ . NSg/I/C Nᴹ/Vg/J P NSg/VB D NPr . NPr+ . NPr+
|
||||
> Corpus of British English . HMMs involve counting cases ( such as from the Brown
|
||||
# NSg P NPr/J NPr🅪/VB/J+ . ? VB Nᴹ/Vg/J NPl/V3+ . NSg/I NSg/R P D NPr🅪/VB/J
|
||||
> Corpus ) and making a table of the probabilities of certain sequences . For
|
||||
# NSg+ . VB/C Nᴹ/Vg/J D/P NSg/VB P D NPl P I/J NPl/V3+ . C/P
|
||||
> example , once you've seen an article such as ' the ' , perhaps the next word is a
|
||||
# NSg/VB+ . NSg/C W? NSg/VPp D/P NSg/VB+ NSg/I NSg/R . D . . NSg/R D NSg/J/P NSg/VB+ VL3 D/P
|
||||
> noun 40 % of the time , an adjective 40 % , and a number 20 % . Knowing this , a
|
||||
# NSg/VB+ # . P D N🅪Sg/VB/J+ . D/P NSg/VB/J+ # . . VB/C D/P N🅪Sg/VB/JC+ # . . NSg/VB/J/P I/Ddem+ . D/P+
|
||||
> program can decide that " can " in " the can " is far more likely to be a noun than
|
||||
# NPr/VB+ NPr/VXB VB NSg/I/C/Ddem+ . NPr/VXB . NPr/J/P . D+ NPr/VXB . VL3 NSg/VB/J NPr/I/VB/J/R/Dq NSg/J P NSg/VXB D/P NSg/VB C/P
|
||||
> a verb or a modal . The same method can , of course , be used to benefit from
|
||||
# D/P+ NSg/VB+ NPr/C D/P NSg/J . D+ I/J+ NSg/VB+ NPr/VXB . P NSg/VB+ . NSg/VXB VP/J P NSg/VB P
|
||||
> knowledge about the following words .
|
||||
# Nᴹ+ J/P D+ Nᴹ/Vg/J/P NPl/V3+ .
|
||||
>
|
||||
#
|
||||
> More advanced ( " higher - order " ) HMMs learn the probabilities not only of pairs
|
||||
# NPr/I/V/J/R/Dq VP/J . . NSg/JC . NSg/V . . ? NSg/V D NPl+ NSg/C J/R/C P NPl/V3+
|
||||
> but triples or even larger sequences . So , for example , if you've just seen a
|
||||
# NSg/C/P NPl/V3 NPr/C NSg/V/J JC NPl/V3+ . NSg/I/J/C . C/P NSg/V+ . NSg/C W? V/J NSg/VPp D/P
|
||||
> noun followed by a verb , the next item may be very likely a preposition ,
|
||||
# NSg/V+ VP/J NSg/J/P D/P NSg/V+ . D NSg/J/P NSg/V+ NPr/VX NSg/VXB J/R NSg/J D/P NSg/V .
|
||||
> article , or noun , but much less likely another verb .
|
||||
# NSg/V+ . NPr/C NSg/V+ . NSg/C/P NSg/I/J/R/Dq V/J/R/C/P NSg/J I/D NSg/V+ .
|
||||
> More advanced ( " higher - order " ) HMMs learn the probabilities not only of pairs
|
||||
# NPr/I/VB/J/R/Dq VP/J . . NSg/JC . NSg/VB . . ? NSg/VB D NPl+ NSg/C J/R/C P NPl/V3+
|
||||
> but triples or even larger sequences . So , for example , if you've just seen a
|
||||
# NSg/C/P NPl/V3 NPr/C NSg/VB/J JC NPl/V3+ . NSg/I/J/C . C/P NSg/VB+ . NSg/C W? VB/J NSg/VPp D/P
|
||||
> noun followed by a verb , the next item may be very likely a preposition ,
|
||||
# NSg/VB+ VP/J NSg/J/P D/P NSg/VB+ . D NSg/J/P NSg/VB+ NPr/VXB NSg/VXB J/R NSg/J D/P NSg/VB .
|
||||
> article , or noun , but much less likely another verb .
|
||||
# NSg/VB+ . NPr/C NSg/VB+ . NSg/C/P NSg/I/J/R/Dq VB/J/R/C/P NSg/J I/D NSg/VB+ .
|
||||
>
|
||||
#
|
||||
> When several ambiguous words occur together , the possibilities multiply .
|
||||
# NSg/I/C J/Dq+ J+ NPl/V3+ V J . D+ NPl+ NSg/V .
|
||||
> However , it is easy to enumerate every combination and to assign a relative
|
||||
# C . NPr/ISg+ VL3 NSg/V/J P V Dq+ N🅪Sg+ V/C P NSg/V D/P NSg/J
|
||||
> probability to each one , by multiplying together the probabilities of each
|
||||
# NSg+ P Dq NSg/I/V/J+ . NSg/J/P Nᴹ/Vg/J J D NPl P Dq
|
||||
> choice in turn . The combination with the highest probability is then chosen . The
|
||||
# N🅪Sg/J+ NPr/J/P NSg/V . D N🅪Sg P D+ JS+ NSg+ VL3 NSg/J/C Nᴹ/V/J . D+
|
||||
> European group developed CLAWS , a tagging program that did exactly this and
|
||||
# NSg/J+ NSg/V+ VP/J NPl/V3+ . D/P NSg/V NPr/V+ NSg/I/C/Ddem+ VPt R I/Ddem V/C
|
||||
> achieved accuracy in the 93 – 95 % range .
|
||||
# VP/J N🅪Sg+ NPr/J/P D # . # . N🅪Sg/V+ .
|
||||
# NSg/I/C J/Dq+ J+ NPl/V3+ VB J . D+ NPl+ NSg/VB .
|
||||
> However , it is easy to enumerate every combination and to assign a relative
|
||||
# C . NPr/ISg+ VL3 NSg/VB/J P VB Dq+ N🅪Sg+ VB/C P NSg/VB D/P NSg/J
|
||||
> probability to each one , by multiplying together the probabilities of each
|
||||
# NSg+ P Dq NSg/I/VB/J+ . NSg/J/P Nᴹ/Vg/J J D NPl P Dq
|
||||
> choice in turn . The combination with the highest probability is then chosen . The
|
||||
# N🅪Sg/J+ NPr/J/P NSg/VB . D N🅪Sg P D+ JS+ NSg+ VL3 NSg/J/C Nᴹ/VPp/J . D+
|
||||
> European group developed CLAWS , a tagging program that did exactly this and
|
||||
# NSg/J+ NSg/VB+ VP/J NPl/V3+ . D/P NSg/Vg NPr/VB+ NSg/I/C/Ddem+ VPt R I/Ddem VB/C
|
||||
> achieved accuracy in the 93 – 95 % range .
|
||||
# VP/J N🅪Sg+ NPr/J/P D # . # . N🅪Sg/VB+ .
|
||||
>
|
||||
#
|
||||
> Eugene Charniak points out in Statistical techniques for natural language
|
||||
# NPr+ ? NPl/V3+ NSg/V/J/R/P NPr/J/P J NPl C/P NSg/J+ N🅪Sg/V+
|
||||
> parsing ( 1997 ) that merely assigning the most common tag to each known word and
|
||||
# Nᴹ/Vg/J . # . NSg/I/C/Ddem+ R Nᴹ/Vg/J D NSg/I/J/R/Dq NSg/V/J NSg/V+ P Dq VPp/J NSg/V+ V/C
|
||||
> the tag " proper noun " to all unknowns will approach 90 % accuracy because many
|
||||
# D NSg/V+ . NSg/J NSg/V+ . P NSg/I/J/C/Dq NPl/V3+ NPr/VX NSg/V+ # . N🅪Sg+ C/P NSg/I/J/Dq
|
||||
> words are unambiguous , and many others only rarely represent their less - common
|
||||
# NPl/V3+ V J . V/C NSg/I/J/Dq NPl/V3+ J/R/C R V D$+ V/J/R/C/P . NSg/V/J
|
||||
> parts of speech .
|
||||
# NPl/V3 P N🅪Sg/V+ .
|
||||
> Eugene Charniak points out in Statistical techniques for natural language
|
||||
# NPr+ ? NPl/V3+ NSg/VB/J/R/P NPr/J/P J NPl C/P NSg/J+ N🅪Sg/VB+
|
||||
> parsing ( 1997 ) that merely assigning the most common tag to each known word and
|
||||
# Nᴹ/Vg/J . # . NSg/I/C/Ddem+ R Nᴹ/Vg/J D NSg/I/J/R/Dq NSg/VB/J NSg/VB+ P Dq VPp/J NSg/VB+ VB/C
|
||||
> the tag " proper noun " to all unknowns will approach 90 % accuracy because many
|
||||
# D NSg/VB+ . NSg/J NSg/VB+ . P NSg/I/J/C/Dq NPl/V3+ NPr/VXB NSg/VB+ # . N🅪Sg+ C/P NSg/I/J/Dq
|
||||
> words are unambiguous , and many others only rarely represent their less - common
|
||||
# NPl/V3+ VB J . VB/C NSg/I/J/Dq NPl/V3+ J/R/C R VB D$+ VB/J/R/C/P . NSg/VB/J
|
||||
> parts of speech .
|
||||
# NPl/V3 P N🅪Sg/VB+ .
|
||||
>
|
||||
#
|
||||
> CLAWS pioneered the field of HMM - based part of speech tagging but was quite
|
||||
# NPl/V3+ VP/J D NSg/V P V . VP/J NSg/V/J P N🅪Sg/V+ NSg/V NSg/C/P VPt R
|
||||
> CLAWS pioneered the field of HMM - based part of speech tagging but was quite
|
||||
# NPl/V3+ VP/J D NSg/VB P VB . VP/J NSg/VB/J P N🅪Sg/VB+ NSg/Vg NSg/C/P VPt R
|
||||
> expensive since it enumerated all possibilities . It sometimes had to resort to
|
||||
# J C/P NPr/ISg+ VP/J NSg/I/J/C/Dq NPl+ . NPr/ISg+ R V P NSg/V P
|
||||
> backup methods when there were simply too many options ( the Brown Corpus
|
||||
# NSg/J NPl/V3+ NSg/I/C + NSg/VPt R W? NSg/I/J/Dq NPl/V3 . D+ NPr🅪/V/J+ NSg+
|
||||
> contains a case with 17 ambiguous words in a row , and there are words such as
|
||||
# V3 D/P NPr🅪/V+ P # J NPl/V3 NPr/J/P D/P+ NSg/V+ . V/C + V NPl/V3+ NSg/I NSg/R
|
||||
> " still " that can represent as many as 7 distinct parts of speech .
|
||||
# . NSg/V/J . NSg/I/C/Ddem+ NPr/VX V NSg/R NSg/I/J/Dq NSg/R # V/J NPl/V3 P N🅪Sg/V+ .
|
||||
# J C/P NPr/ISg+ VP/J NSg/I/J/C/Dq NPl+ . NPr/ISg+ R VB P NSg/VB P
|
||||
> backup methods when there were simply too many options ( the Brown Corpus
|
||||
# NSg/J NPl/V3+ NSg/I/C + NSg/VPt R W? NSg/I/J/Dq NPl/V3 . D+ NPr🅪/VB/J+ NSg+
|
||||
> contains a case with 17 ambiguous words in a row , and there are words such as
|
||||
# V3 D/P NPr🅪/VB+ P # J NPl/V3 NPr/J/P D/P+ NSg/VB+ . VB/C + VB NPl/V3+ NSg/I NSg/R
|
||||
> " still " that can represent as many as 7 distinct parts of speech .
|
||||
# . NSg/VB/J . NSg/I/C/Ddem+ NPr/VXB VB NSg/R NSg/I/J/Dq NSg/R # VB/J NPl/V3 P N🅪Sg/VB+ .
|
||||
>
|
||||
#
|
||||
> HMMs underlie the functioning of stochastic taggers and are used in various
|
||||
# ? V D Nᴹ/Vg/J+ P J NPl V/C V VPPtPp/J NPr/J/P J
|
||||
> algorithms one of the most widely used being the bi - directional inference
|
||||
# NPl+ NSg/I/V/J P D NSg/I/J/R/Dq R VPPtPp/J N🅪Sg/Vg/J/C D NSg/J . NSg/J NSg+
|
||||
> HMMs underlie the functioning of stochastic taggers and are used in various
|
||||
# ? VB D Nᴹ/Vg/J+ P J NPl VB/C VB VP/J NPr/J/P J
|
||||
> algorithms one of the most widely used being the bi - directional inference
|
||||
# NPl+ NSg/I/VB/J P D NSg/I/J/R/Dq R VP/J N🅪Sg/Vg/J/C D NSg/J . NSg/J NSg+
|
||||
> algorithm .
|
||||
# NSg .
|
||||
>
|
||||
|
|
@ -312,129 +312,129 @@
|
|||
# NSg/J+ Nᴹ/Vg/J+ NPl/V3+
|
||||
>
|
||||
#
|
||||
> In 1987 , Steven DeRose and Kenneth W. Church independently developed dynamic
|
||||
# NPr/J/P # . NPr+ ? V/C NPr+ ? NPr🅪/V+ R VP/J NSg/J
|
||||
> programming algorithms to solve the same problem in vastly less time . Their
|
||||
# Nᴹ/Vg/J+ NPl+ P NSg/V D I/J NSg/J+ NPr/J/P R V/J/R/C/P N🅪Sg/V/J+ . D$+
|
||||
> methods were similar to the Viterbi algorithm known for some time in other
|
||||
# NPl/V3+ NSg/VPt NSg/J P D ? NSg VPp/J C/P I/J/R/Dq N🅪Sg/V/J+ NPr/J/P NSg/V/J
|
||||
> fields . DeRose used a table of pairs , while Church used a table of triples and a
|
||||
# NPrPl/V3+ . ? VPPtPp/J D/P NSg/V P NPl/V3+ . NSg/V/C/P NPr🅪/V+ VPPtPp/J D/P NSg/V P NPl/V3 V/C D/P
|
||||
> method of estimating the values for triples that were rare or nonexistent in the
|
||||
# NSg/V P Nᴹ/Vg/J D NPl/V3+ C/P NPl/V3 NSg/I/C/Ddem+ NSg/VPt NSg/V/J NPr/C NSg/J NPr/J/P D
|
||||
> Brown Corpus ( an actual measurement of triple probabilities would require a much
|
||||
# NPr🅪/V/J NSg+ . D/P NSg/J N🅪Sg P NSg/V/J NPl+ VX NSg/V D/P NSg/I/J/R/Dq
|
||||
> In 1987 , Steven DeRose and Kenneth W. Church independently developed dynamic
|
||||
# NPr/J/P # . NPr+ ? VB/C NPr+ ? NPr🅪/VB+ R VP/J NSg/J
|
||||
> programming algorithms to solve the same problem in vastly less time . Their
|
||||
# Nᴹ/Vg/J+ NPl+ P NSg/VB D I/J NSg/J+ NPr/J/P R VB/J/R/C/P N🅪Sg/VB/J+ . D$+
|
||||
> methods were similar to the Viterbi algorithm known for some time in other
|
||||
# NPl/V3+ NSg/VPt NSg/J P D ? NSg VPp/J C/P I/J/R/Dq N🅪Sg/VB/J+ NPr/J/P NSg/VB/J
|
||||
> fields . DeRose used a table of pairs , while Church used a table of triples and a
|
||||
# NPrPl/V3+ . ? VP/J D/P NSg/VB P NPl/V3+ . NSg/VB/C/P NPr🅪/VB+ VP/J D/P NSg/VB P NPl/V3 VB/C D/P
|
||||
> method of estimating the values for triples that were rare or nonexistent in the
|
||||
# NSg/VB P Nᴹ/Vg/J D NPl/V3+ C/P NPl/V3 NSg/I/C/Ddem+ NSg/VPt NSg/VB/J NPr/C NSg/J NPr/J/P D
|
||||
> Brown Corpus ( an actual measurement of triple probabilities would require a much
|
||||
# NPr🅪/VB/J NSg+ . D/P NSg/J N🅪Sg P NSg/VB/J NPl+ VXB NSg/VB D/P NSg/I/J/R/Dq
|
||||
> larger corpus ) . Both methods achieved an accuracy of over 95 % . DeRose's 1990
|
||||
# JC NSg+ . . I/C/Dq NPl/V3+ VP/J D/P N🅪Sg+ P NSg/J/P # . . ? #
|
||||
> dissertation at Brown University included analyses of the specific error types ,
|
||||
# NSg+ NSg/P NPr🅪/V/J NSg+ VP/J NPl/V3/Au/Br P D NSg/J NSg/V+ NPl/V3+ .
|
||||
> probabilities , and other related data , and replicated his work for Greek , where
|
||||
# NPl+ . V/C NSg/V/J J N🅪Pl+ . V/C VP/J ISg/D$+ N🅪Sg/V+ C/P NPr/V/J . NSg/C
|
||||
> dissertation at Brown University included analyses of the specific error types ,
|
||||
# NSg+ NSg/P NPr🅪/VB/J NSg+ VP/J NPl/V3/Au/Br P D NSg/J NSg/VB+ NPl/V3+ .
|
||||
> probabilities , and other related data , and replicated his work for Greek , where
|
||||
# NPl+ . VB/C NSg/VB/J J N🅪Pl+ . VB/C VP/J ISg/D$+ N🅪Sg/VB+ C/P NPr/VB/J . NSg/C
|
||||
> it proved similarly effective .
|
||||
# NPr/ISg+ VP/J R NSg/J .
|
||||
>
|
||||
#
|
||||
> These findings were surprisingly disruptive to the field of natural language
|
||||
# I/Ddem+ NSg+ NSg/VPt R J P D NSg/V P NSg/J+ N🅪Sg/V+
|
||||
> These findings were surprisingly disruptive to the field of natural language
|
||||
# I/Ddem+ NSg+ NSg/VPt R J P D NSg/VB P NSg/J+ N🅪Sg/VB+
|
||||
> processing . The accuracy reported was higher than the typical accuracy of very
|
||||
# Nᴹ/Vg/J+ . D+ N🅪Sg+ VP/J VPt NSg/JC C/P D NSg/J N🅪Sg P J/R
|
||||
> sophisticated algorithms that integrated part of speech choice with many higher
|
||||
# VP/J+ NPl+ NSg/I/C/Ddem+ VP/J NSg/V/J P N🅪Sg/V+ N🅪Sg/J+ P NSg/I/J/Dq NSg/JC
|
||||
> levels of linguistic analysis : syntax , morphology , semantics , and so on . CLAWS ,
|
||||
# NPl/V3 P J N🅪Sg . Nᴹ+ . Nᴹ+ . NPl+ . V/C NSg/I/J/C J/P . NPl/V3+ .
|
||||
> DeRose's and Church's methods did fail for some of the known cases where
|
||||
# ? V/C NSg$ NPl/V3+ VPt NSg/V/J C/P I/J/R/Dq P D VPp/J NPl/V3+ NSg/C
|
||||
> semantics is required , but those proved negligibly rare . This convinced many in
|
||||
# NPl+ VL3 VP/J . NSg/C/P I/Ddem VP/J R NSg/V/J . I/Ddem VP/J NSg/I/J/Dq NPr/J/P
|
||||
> the field that part - of - speech tagging could usefully be separated from the other
|
||||
# D+ NSg/V+ NSg/I/C/Ddem+ NSg/V/J+ . P . N🅪Sg/V+ NSg/V NSg/VX R NSg/VXB VP/J P D NSg/V/J
|
||||
> levels of processing ; this , in turn , simplified the theory and practice of
|
||||
# NPl/V3 P Nᴹ/Vg/J+ . I/Ddem+ . NPr/J/P NSg/V . VP/J D N🅪Sg V/C NSg/V P
|
||||
> computerized language analysis and encouraged researchers to find ways to
|
||||
# VP/J N🅪Sg/V+ N🅪Sg+ V/C VP/J NPl+ P NSg/V NPl+ P
|
||||
> separate other pieces as well . Markov Models became the standard method for the
|
||||
# NSg/V/J NSg/V/J NPl/V3+ NSg/R NSg/V/J . NPr NPl/V3+ VPt D NSg/J NSg/V+ C/P D
|
||||
> part - of - speech assignment .
|
||||
# NSg/V/J+ . P . N🅪Sg/V+ NSg+ .
|
||||
> sophisticated algorithms that integrated part of speech choice with many higher
|
||||
# VP/J+ NPl+ NSg/I/C/Ddem+ VP/J NSg/VB/J P N🅪Sg/VB+ N🅪Sg/J+ P NSg/I/J/Dq NSg/JC
|
||||
> levels of linguistic analysis : syntax , morphology , semantics , and so on . CLAWS ,
|
||||
# NPl/V3 P J N🅪Sg . Nᴹ+ . Nᴹ+ . NPl+ . VB/C NSg/I/J/C J/P . NPl/V3+ .
|
||||
> DeRose's and Church's methods did fail for some of the known cases where
|
||||
# ? VB/C NSg$ NPl/V3+ VPt NSg/VB/J C/P I/J/R/Dq P D VPp/J NPl/V3+ NSg/C
|
||||
> semantics is required , but those proved negligibly rare . This convinced many in
|
||||
# NPl+ VL3 VP/J . NSg/C/P I/Ddem VP/J R NSg/VB/J . I/Ddem VP/J NSg/I/J/Dq NPr/J/P
|
||||
> the field that part - of - speech tagging could usefully be separated from the other
|
||||
# D+ NSg/VB+ NSg/I/C/Ddem+ NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg NSg/VXB R NSg/VXB VP/J P D NSg/VB/J
|
||||
> levels of processing ; this , in turn , simplified the theory and practice of
|
||||
# NPl/V3 P Nᴹ/Vg/J+ . I/Ddem+ . NPr/J/P NSg/VB . VP/J D N🅪Sg VB/C NSg/VB P
|
||||
> computerized language analysis and encouraged researchers to find ways to
|
||||
# VP/J N🅪Sg/VB+ N🅪Sg+ VB/C VP/J NPl+ P NSg/VB NPl+ P
|
||||
> separate other pieces as well . Markov Models became the standard method for the
|
||||
# NSg/VB/J NSg/VB/J NPl/V3+ NSg/R NSg/VB/J . NPr NPl/V3+ VPt D NSg/J NSg/VB+ C/P D
|
||||
> part - of - speech assignment .
|
||||
# NSg/VB/J+ . P . N🅪Sg/VB+ NSg+ .
|
||||
>
|
||||
#
|
||||
> Unsupervised taggers
|
||||
# V/J NPl
|
||||
# VB/J NPl
|
||||
>
|
||||
#
|
||||
> The methods already discussed involve working from a pre - existing corpus to
|
||||
# D+ NPl/V3+ W? VP/J V Nᴹ/Vg/J P D/P+ NSg/V/P+ . Nᴹ/Vg/J NSg+ P
|
||||
> learn tag probabilities . It is , however , also possible to bootstrap using
|
||||
# NSg/V NSg/V+ NPl+ . NPr/ISg+ VL3 . C . R/C NSg/J P NSg/V Nᴹ/Vg/J
|
||||
> The methods already discussed involve working from a pre - existing corpus to
|
||||
# D+ NPl/V3+ W? VP/J VB Nᴹ/Vg/J P D/P+ NSg/VB/P+ . Nᴹ/Vg/J NSg+ P
|
||||
> learn tag probabilities . It is , however , also possible to bootstrap using
|
||||
# NSg/VB NSg/VB+ NPl+ . NPr/ISg+ VL3 . C . R/C NSg/J P NSg/VB Nᴹ/Vg/J
|
||||
> " unsupervised " tagging . Unsupervised tagging techniques use an untagged corpus
|
||||
# . V/J . NSg/V . V/J NSg/V NPl+ N🅪Sg/VB D/P J NSg+
|
||||
> for their training data and produce the tagset by induction . That is , they
|
||||
# C/P D$+ Nᴹ/Vg/J+ N🅪Pl+ V/C Nᴹ/V D NSg NSg/J/P N🅪Sg . NSg/I/C/Ddem+ VL3 . IPl+
|
||||
> observe patterns in word use , and derive part - of - speech categories themselves .
|
||||
# NSg/V NPl/V3+ NPr/J/P NSg/V+ N🅪Sg/VB . V/C NSg/V NSg/V/J+ . P . N🅪Sg/V+ NPl+ IPl+ .
|
||||
> For example , statistics readily reveal that " the " , " a " , and " an " occur in
|
||||
# C/P NSg/V+ . NPl/V3+ R NSg/V NSg/I/C/Ddem+ . D . . . D/P . . V/C . D/P . V NPr/J/P
|
||||
> similar contexts , while " eat " occurs in very different ones . With sufficient
|
||||
# NSg/J+ NPl/V3+ . NSg/V/C/P . V . V3 NPr/J/P J/R NSg/J+ NPl/V3+ . P J
|
||||
# . VB/J . NSg/Vg . VB/J NSg/Vg NPl+ N🅪Sg/VB D/P J NSg+
|
||||
> for their training data and produce the tagset by induction . That is , they
|
||||
# C/P D$+ Nᴹ/Vg/J+ N🅪Pl+ VB/C Nᴹ/VB D NSg NSg/J/P N🅪Sg . NSg/I/C/Ddem+ VL3 . IPl+
|
||||
> observe patterns in word use , and derive part - of - speech categories themselves .
|
||||
# NSg/VB NPl/V3+ NPr/J/P NSg/VB+ N🅪Sg/VB . VB/C NSg/VB NSg/VB/J+ . P . N🅪Sg/VB+ NPl+ IPl+ .
|
||||
> For example , statistics readily reveal that " the " , " a " , and " an " occur in
|
||||
# C/P NSg/VB+ . NPl/V3+ R NSg/VB NSg/I/C/Ddem+ . D . . . D/P . . VB/C . D/P . VB NPr/J/P
|
||||
> similar contexts , while " eat " occurs in very different ones . With sufficient
|
||||
# NSg/J+ NPl/V3+ . NSg/VB/C/P . VB . V3 NPr/J/P J/R NSg/J+ NPl/V3+ . P J
|
||||
> iteration , similarity classes of words emerge that are remarkably similar to
|
||||
# N🅪Sg . NSg NPl/V3 P NPl/V3+ NSg/V NSg/I/C/Ddem+ V R NSg/J P
|
||||
> those human linguists would expect ; and the differences themselves sometimes
|
||||
# I/Ddem NSg/V/J NPl+ VX V . V/C D NPl/V+ IPl+ R
|
||||
> suggest valuable new insights .
|
||||
# V NSg/J NSg/V/J NPl+ .
|
||||
# N🅪Sg . NSg NPl/V3 P NPl/V3+ NSg/VB NSg/I/C/Ddem+ VB R NSg/J P
|
||||
> those human linguists would expect ; and the differences themselves sometimes
|
||||
# I/Ddem NSg/VB/J NPl+ VXB VB . VB/C D NPl/VB+ IPl+ R
|
||||
> suggest valuable new insights .
|
||||
# VB NSg/J NSg/VB/J NPl+ .
|
||||
>
|
||||
#
|
||||
> These two categories can be further subdivided into rule - based , stochastic , and
|
||||
# I/Ddem+ NSg+ NPl+ NPr/VX NSg/VXB V/J VP/J P NSg/V+ . VP/J . J . V/C
|
||||
> These two categories can be further subdivided into rule - based , stochastic , and
|
||||
# I/Ddem+ NSg+ NPl+ NPr/VXB NSg/VXB VB/J VP/J P NSg/VB+ . VP/J . J . VB/C
|
||||
> neural approaches .
|
||||
# J NPl/V3+ .
|
||||
>
|
||||
#
|
||||
> Other taggers and methods
|
||||
# NSg/V/J NPl V/C NPl/V3+
|
||||
> Other taggers and methods
|
||||
# NSg/VB/J NPl VB/C NPl/V3+
|
||||
>
|
||||
#
|
||||
> Some current major algorithms for part - of - speech tagging include the Viterbi
|
||||
# I/J/R/Dq NSg/J NPr/V/J NPl C/P NSg/V/J+ . P . N🅪Sg/V+ NSg/V NSg/V D ?
|
||||
> algorithm , Brill tagger , Constraint Grammar , and the Baum - Welch algorithm ( also
|
||||
# NSg . NSg/J NSg . NSg+ N🅪Sg/V+ . V/C D NPr . ? NSg . R/C
|
||||
> known as the forward - backward algorithm ) . Hidden Markov model and visible Markov
|
||||
# VPp/J NSg/R D NSg/V/J . NSg/J NSg . . V/J NPr NSg/V/J+ V/C J NPr
|
||||
> model taggers can both be implemented using the Viterbi algorithm . The
|
||||
# NSg/V/J+ NPl NPr/VX I/C/Dq NSg/VXB VP/J Nᴹ/Vg/J D ? NSg . D+
|
||||
> rule - based Brill tagger is unusual in that it learns a set of rule patterns , and
|
||||
# NSg/V+ . VP/J NSg/J NSg VL3 NSg/J NPr/J/P NSg/I/C/Ddem NPr/ISg+ NPl/V3 D/P NPr/V/J P NSg/V+ NPl/V3+ . V/C
|
||||
> then applies those patterns rather than optimizing a statistical quantity .
|
||||
# NSg/J/C V3 I/Ddem NPl/V3+ NPr/V/J/R C/P Nᴹ/Vg/J D/P J N🅪Sg+ .
|
||||
> Some current major algorithms for part - of - speech tagging include the Viterbi
|
||||
# I/J/R/Dq NSg/J NPr/VB/J NPl C/P NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg NSg/VB D ?
|
||||
> algorithm , Brill tagger , Constraint Grammar , and the Baum - Welch algorithm ( also
|
||||
# NSg . NSg/J NSg . NSg+ N🅪Sg/VB+ . VB/C D NPr . ? NSg . R/C
|
||||
> known as the forward - backward algorithm ) . Hidden Markov model and visible Markov
|
||||
# VPp/J NSg/R D NSg/VB/J . NSg/J NSg . . VB/J NPr NSg/VB/J+ VB/C J NPr
|
||||
> model taggers can both be implemented using the Viterbi algorithm . The
|
||||
# NSg/VB/J+ NPl NPr/VXB I/C/Dq NSg/VXB VP/J Nᴹ/Vg/J D ? NSg . D+
|
||||
> rule - based Brill tagger is unusual in that it learns a set of rule patterns , and
|
||||
# NSg/VB+ . VP/J NSg/J NSg VL3 NSg/J NPr/J/P NSg/I/C/Ddem NPr/ISg+ NPl/V3 D/P NPr/VB/J P NSg/VB+ NPl/V3+ . VB/C
|
||||
> then applies those patterns rather than optimizing a statistical quantity .
|
||||
# NSg/J/C V3 I/Ddem NPl/V3+ NPr/VB/J/R C/P Nᴹ/Vg/J D/P J N🅪Sg+ .
|
||||
>
|
||||
#
|
||||
> Many machine learning methods have also been applied to the problem of POS
|
||||
# NSg/I/J/Dq+ NSg/V+ Nᴹ/Vg/J+ NPl/V3+ NSg/VX R/C NSg/VPp VP/J P D NSg/J P NSg+
|
||||
> Many machine learning methods have also been applied to the problem of POS
|
||||
# NSg/I/J/Dq+ NSg/VB+ Nᴹ/Vg/J+ NPl/V3+ NSg/VXB R/C NSg/VPp VP/J P D NSg/J P NSg+
|
||||
> tagging . Methods such as SVM , maximum entropy classifier , perceptron , and
|
||||
# NSg/V . NPl/V3+ NSg/I NSg/R ? . NSg/J NSg NSg . NSg . V/C
|
||||
> nearest - neighbor have all been tried , and most can achieve accuracy above
|
||||
# JS . NSg/V/J/Am+ NSg/VX NSg/I/J/C/Dq NSg/VPp VP/J . V/C NSg/I/J/R/Dq NPr/VX V N🅪Sg+ NSg/J/P
|
||||
# NSg/Vg . NPl/V3+ NSg/I NSg/R ? . NSg/J NSg NSg . NSg . VB/C
|
||||
> nearest - neighbor have all been tried , and most can achieve accuracy above
|
||||
# JS . NSg/VB/J/Am+ NSg/VXB NSg/I/J/C/Dq NSg/VPp VP/J . VB/C NSg/I/J/R/Dq NPr/VXB VB N🅪Sg+ NSg/J/P
|
||||
> 95 % . [ citation needed ]
|
||||
# # . . . NSg+ VP/J .
|
||||
>
|
||||
#
|
||||
> A direct comparison of several methods is reported ( with references ) at the ACL
|
||||
# D/P V/J NSg P J/Dq+ NPl/V3+ VL3 VP/J . P NPl/V3+ . NSg/P D NSg
|
||||
> Wiki . This comparison uses the Penn tag set on some of the Penn Treebank data ,
|
||||
# NSg/V+ . I/Ddem+ NSg+ NPl/V3 D+ NPr+ NSg/V+ NPr/V/J J/P I/J/R/Dq P D NPr+ ? N🅪Pl+ .
|
||||
# D/P VB/J NSg P J/Dq+ NPl/V3+ VL3 VP/J . P NPl/V3+ . NSg/P D NSg
|
||||
> Wiki . This comparison uses the Penn tag set on some of the Penn Treebank data ,
|
||||
# NSg/VB+ . I/Ddem+ NSg+ NPl/V3 D+ NPr+ NSg/VB+ NPr/VB/J J/P I/J/R/Dq P D NPr+ ? N🅪Pl+ .
|
||||
> so the results are directly comparable . However , many significant taggers are
|
||||
# NSg/I/J/C D NPl/V3+ V R/C NSg/J . C . NSg/I/J/Dq NSg/J NPl V
|
||||
> not included ( perhaps because of the labor involved in reconfiguring them for
|
||||
# NSg/C VP/J . NSg/R C/P P D NPr🅪/V/Am/Au+ VP/J NPr/J/P Nᴹ/Vg/J NSg/IPl+ C/P
|
||||
# NSg/I/J/C D NPl/V3+ VB R/C NSg/J . C . NSg/I/J/Dq NSg/J NPl VB
|
||||
> not included ( perhaps because of the labor involved in reconfiguring them for
|
||||
# NSg/C VP/J . NSg/R C/P P D NPr🅪/VB/Am/Au+ VP/J NPr/J/P Nᴹ/Vg/J NSg/IPl+ C/P
|
||||
> this particular dataset ) . Thus , it should not be assumed that the results
|
||||
# I/Ddem NSg/J NSg . . NSg . NPr/ISg+ VX NSg/C NSg/VXB VP/J NSg/I/C/Ddem D+ NPl/V3+
|
||||
> reported here are the best that can be achieved with a given approach ; nor even
|
||||
# VP/J NSg/J/R V D NPr/VX/JS NSg/I/C/Ddem+ NPr/VX NSg/VXB VP/J P D/P+ NSg/V/J/P+ NSg/V+ . NSg/C NSg/V/J
|
||||
> the best that have been achieved with a given approach .
|
||||
# D+ NPr/VX/JS+ NSg/I/C/Ddem+ NSg/VX NSg/VPp VP/J P D/P+ NSg/V/J/P+ NSg/V+ .
|
||||
# I/Ddem NSg/J NSg . . NSg . NPr/ISg+ VXB NSg/C NSg/VXB VP/J NSg/I/C/Ddem D+ NPl/V3+
|
||||
> reported here are the best that can be achieved with a given approach ; nor even
|
||||
# VP/J NSg/J/R VB D NPr/VXB/JS NSg/I/C/Ddem+ NPr/VXB NSg/VXB VP/J P D/P+ NSg/VB/J/P+ NSg/VB+ . NSg/C NSg/VB/J
|
||||
> the best that have been achieved with a given approach .
|
||||
# D+ NPr/VXB/JS+ NSg/I/C/Ddem+ NSg/VXB NSg/VPp VP/J P D/P+ NSg/VB/J/P+ NSg/VB+ .
|
||||
>
|
||||
#
|
||||
> In 2014 , a paper reporting using the structure regularization method for
|
||||
# NPr/J/P # . D/P+ N🅪Sg/V/J+ Nᴹ/Vg/J Nᴹ/Vg/J D N🅪Sg/V+ N🅪Sg NSg/V C/P
|
||||
> part - of - speech tagging , achieving 97.36 % on a standard benchmark dataset .
|
||||
# NSg/V/J+ . P . N🅪Sg/V+ NSg/V . Nᴹ/Vg/J # . J/P D/P NSg/J NSg/V NSg .
|
||||
> In 2014 , a paper reporting using the structure regularization method for
|
||||
# NPr/J/P # . D/P+ N🅪Sg/VB/J+ Nᴹ/Vg/J Nᴹ/Vg/J D N🅪Sg/VB+ N🅪Sg NSg/VB C/P
|
||||
> part - of - speech tagging , achieving 97.36 % on a standard benchmark dataset .
|
||||
# NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg . Nᴹ/Vg/J # . J/P D/P NSg/J NSg/VB NSg .
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
> Spell
|
||||
# NSg/V
|
||||
# NSg/VB
|
||||
>
|
||||
#
|
||||
> This document contains a list of words spelled correctly in some dialects of English , but not American English . This is designed to test the spelling suggestions we give for such mistakes .
|
||||
# I/Ddem+ NSg/V+ V3 D/P NSg/V P NPl/V3+ VP/J R NPr/J/P I/J/R/Dq NPl P NPr🅪/V/J+ . NSg/C/P NSg/C NPr/J NPr🅪/V/J+ . I/Ddem+ VL3 VP/J P NSg/V D+ Nᴹ/Vg/J+ NPl+ IPl+ NSg/V C/P NSg/I+ NPl/V3+ .
|
||||
> This document contains a list of words spelled correctly in some dialects of English , but not American English . This is designed to test the spelling suggestions we give for such mistakes .
|
||||
# I/Ddem+ NSg/VB+ V3 D/P NSg/VB P NPl/V3+ VP/J R NPr/J/P I/J/R/Dq NPl P NPr🅪/VB/J+ . NSg/C/P NSg/C NPr/J NPr🅪/VB/J+ . I/Ddem+ VL3 VP/J P NSg/VB D+ Nᴹ/Vg/J+ NPl+ IPl+ NSg/VB C/P NSg/I+ NPl/V3+ .
|
||||
>
|
||||
#
|
||||
> To achieve this , the filename of this file contains `.US , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect .
|
||||
# P V I/Ddem+ . D NSg P I/Ddem NSg/V+ V3 Unlintable . I/C+ NPr/VX NPr/VB D NSg/V+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/V/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P W? VP/J NSg+ .
|
||||
> To achieve this , the filename of this file contains `.US , which will tell the snapshot generator to use the American dialect , rather than trying to use an automatically detected dialect .
|
||||
# P VB I/Ddem+ . D NSg P I/Ddem NSg/VB+ V3 Unlintable . I/C+ NPr/VXB NPr/VB D NSg/VB+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/VB/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P W? VP/J NSg+ .
|
||||
>
|
||||
#
|
||||
> Words
|
||||
|
|
@ -22,57 +22,57 @@
|
|||
# R/Comm .
|
||||
>
|
||||
#
|
||||
> Centre .
|
||||
# NSg/V/Comm+ .
|
||||
> Centre .
|
||||
# NSg/VB/Comm+ .
|
||||
>
|
||||
#
|
||||
> Labelled .
|
||||
# V/J/Comm .
|
||||
> Labelled .
|
||||
# VB/J/Comm .
|
||||
>
|
||||
#
|
||||
> Flavour .
|
||||
# N🅪Sg/V/Comm+ .
|
||||
> Flavour .
|
||||
# N🅪Sg/VB/Comm+ .
|
||||
>
|
||||
#
|
||||
> Favoured .
|
||||
# VP/J/Comm .
|
||||
>
|
||||
#
|
||||
> Honour .
|
||||
# N🅪Sg/V/Comm+ .
|
||||
> Honour .
|
||||
# N🅪Sg/VB/Comm+ .
|
||||
>
|
||||
#
|
||||
> Grey .
|
||||
# NPr🅪/V/J/Comm .
|
||||
> Grey .
|
||||
# NPr🅪/VB/J/Comm .
|
||||
>
|
||||
#
|
||||
> Quarrelled .
|
||||
# V/Comm .
|
||||
# VB/Comm .
|
||||
>
|
||||
#
|
||||
> Quarrelling .
|
||||
# Nᴹ/V/Comm .
|
||||
# Nᴹ/VB/Comm .
|
||||
>
|
||||
#
|
||||
> Recognised .
|
||||
# VP/J/Au/Br .
|
||||
>
|
||||
#
|
||||
> Neighbour .
|
||||
# NSg/V/J/Comm+ .
|
||||
> Neighbour .
|
||||
# NSg/VB/J/Comm+ .
|
||||
>
|
||||
#
|
||||
> Neighbouring .
|
||||
# Nᴹ/Vg/J/Comm .
|
||||
>
|
||||
#
|
||||
> Clamour .
|
||||
# NSg/V/Comm .
|
||||
> Clamour .
|
||||
# NSg/VB/Comm .
|
||||
>
|
||||
#
|
||||
> Theatre .
|
||||
# N🅪Sg/Comm+ .
|
||||
>
|
||||
#
|
||||
> Analyse .
|
||||
# V/Au/Br .
|
||||
> Analyse .
|
||||
# VB/Au/Br .
|
||||
|
|
|
|||
|
|
@ -1,26 +1,26 @@
|
|||
> Spell
|
||||
# NSg/V
|
||||
# NSg/VB
|
||||
>
|
||||
#
|
||||
> This document contains example sentences with misspelled words that we want to test the spell checker on .
|
||||
# I/Ddem+ NSg/V+ V3 NSg/V+ NPl/V3+ P VP/J NPl/V3+ NSg/I/C/Ddem+ IPl+ NSg/V P NSg/V D NSg/V NSg/V J/P .
|
||||
> This document contains example sentences with misspelled words that we want to test the spell checker on .
|
||||
# I/Ddem+ NSg/VB+ V3 NSg/VB+ NPl/V3+ P VP/J NPl/V3+ NSg/I/C/Ddem+ IPl+ NSg/VB P NSg/VB D NSg/VB NSg/VB J/P .
|
||||
>
|
||||
#
|
||||
> Example Sentences
|
||||
# NSg/V+ NPl/V3+
|
||||
# NSg/VB+ NPl/V3+
|
||||
>
|
||||
#
|
||||
> My favourite color is blu .
|
||||
# D$+ NSg/V/J/Comm+ N🅪Sg/V/J/Am+ VL3 W? .
|
||||
> I must defend my honour !
|
||||
# ISg+ NSg/V NSg/V D$+ N🅪Sg/V/Comm+ .
|
||||
> My favourite color is blu .
|
||||
# D$+ NSg/VB/J/Comm+ N🅪Sg/VB/J/Am+ VL3 W? .
|
||||
> I must defend my honour !
|
||||
# ISg+ NSg/VB NSg/VB D$+ N🅪Sg/VB/Comm+ .
|
||||
> I recognize that you recognise me .
|
||||
# ISg+ V NSg/I/C/Ddem ISgPl+ V/Au/Br NPr/ISg+ .
|
||||
# ISg+ VB NSg/I/C/Ddem ISgPl+ VB/Au/Br NPr/ISg+ .
|
||||
> I analyze how you infantilize me .
|
||||
# ISg+ V NSg/C ISgPl+ V NPr/ISg+ .
|
||||
> I analyse how you infantilise me .
|
||||
# ISg+ V/Au/Br NSg/C ISgPl+ ? NPr/ISg+ .
|
||||
# ISg+ VB NSg/C ISgPl+ VB NPr/ISg+ .
|
||||
> I analyse how you infantilise me .
|
||||
# ISg+ VB/Au/Br NSg/C ISgPl+ ? NPr/ISg+ .
|
||||
> Careful , traveller !
|
||||
# J . NSg/Comm+ .
|
||||
> At the centre of the theatre I dropped a litre of coke .
|
||||
# NSg/P D NSg/V/Comm P D+ N🅪Sg/Comm+ ISg+ V/J D/P NSg/Comm P NPr🅪/V+ .
|
||||
> At the centre of the theatre I dropped a litre of coke .
|
||||
# NSg/P D NSg/VB/Comm P D+ N🅪Sg/Comm+ ISg+ VP/J D/P NSg/Comm P NPr🅪/VB+ .
|
||||
|
|
|
|||
|
|
@ -3,16 +3,16 @@
|
|||
>
|
||||
#
|
||||
> This documents tests that different forms / variations of swears are tagged as such .
|
||||
# I/Ddem+ NPl/V3+ NPl/V3+ NSg/I/C/Ddem NSg/J+ NPl/V3+ . W? P NPl/V3 V V/J NSg/R NSg/I .
|
||||
# I/Ddem+ NPl/V3+ NPl/V3+ NSg/I/C/Ddem NSg/J+ NPl/V3+ . W? P NPl/V3 VB VP/J NSg/R NSg/I .
|
||||
>
|
||||
#
|
||||
> Examples
|
||||
# NPl/V3+
|
||||
>
|
||||
#
|
||||
> One turd , two turds .
|
||||
# NSg/I/V/J+ NSg/V+/B . NSg NPl/V3/B .
|
||||
> One turd , two turds .
|
||||
# NSg/I/VB/J+ NSg/VB+/B . NSg NPl/V3/B .
|
||||
>
|
||||
#
|
||||
> I fart , you're farting , he farts , she farted .
|
||||
# ISg+ NSg/V/B . + Nᴹ/Vg/J/B . NPr/ISg+ NPl/V3/B . ISg+ VP/J/B .
|
||||
> I fart , you're farting , he farts , she farted .
|
||||
# ISg+ NSg/VB/B . + Nᴹ/Vg/J/B . NPr/ISg+ NPl/V3/B . ISg+ VP/J/B .
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,74 +1,74 @@
|
|||
> " This " and " that " are common and fulfill multiple purposes in everyday English .
|
||||
# . I/Ddem+ . V/C . NSg/I/C/Ddem+ . V NSg/V/J V/C V/NoAm NSg/J/Dq NPl/V3 NPr/J/P NSg/J+ NPr🅪/V/J+ .
|
||||
> " This " and " that " are common and fulfill multiple purposes in everyday English .
|
||||
# . I/Ddem+ . VB/C . NSg/I/C/Ddem+ . VB NSg/VB/J VB/C VB/NoAm NSg/J/Dq NPl/V3 NPr/J/P NSg/J+ NPr🅪/VB/J+ .
|
||||
> As such , disambiguating them is necessary .
|
||||
# NSg/R NSg/I . Nᴹ/Vg/J NSg/IPl+ VL3 NSg/J .
|
||||
>
|
||||
#
|
||||
> This document contains various sentences that use " this " , " that " , " these " , and
|
||||
# I/Ddem+ NSg/V+ V3 J+ NPl/V3+ NSg/I/C/Ddem+ N🅪Sg/VB . I/Ddem+ . . . NSg/I/C/Ddem+ . . . I/Ddem . . V/C
|
||||
> " those " in different contexts with a lot of edge cases .
|
||||
# . I/Ddem . NPr/J/P NSg/J NPl/V3 P D/P NPr/V P NSg/V+ NPl/V3+ .
|
||||
# I/Ddem+ NSg/VB+ V3 J+ NPl/V3+ NSg/I/C/Ddem+ N🅪Sg/VB . I/Ddem+ . . . NSg/I/C/Ddem+ . . . I/Ddem . . VB/C
|
||||
> " those " in different contexts with a lot of edge cases .
|
||||
# . I/Ddem . NPr/J/P NSg/J NPl/V3 P D/P NPr/VB P NSg/VB+ NPl/V3+ .
|
||||
>
|
||||
#
|
||||
> Examples
|
||||
# NPl/V3+
|
||||
>
|
||||
#
|
||||
> This triangle is nice .
|
||||
# I/Ddem NSg VL3 NPr/V/J .
|
||||
> This is nice .
|
||||
# I/Ddem+ VL3 NPr/V/J .
|
||||
> That triangle is nice .
|
||||
# NSg/I/C/Ddem+ NSg VL3 NPr/V/J .
|
||||
> That is nice .
|
||||
# NSg/I/C/Ddem+ VL3 NPr/V/J .
|
||||
> These triangles are nice .
|
||||
# I/Ddem NPl V NPr/V/J .
|
||||
> These are nice .
|
||||
# I/Ddem+ V NPr/V/J .
|
||||
> Those triangles are nice .
|
||||
# I/Ddem NPl V NPr/V/J .
|
||||
> Those are nice .
|
||||
# I/Ddem+ V NPr/V/J .
|
||||
> This triangle is nice .
|
||||
# I/Ddem NSg VL3 NPr/VB/J .
|
||||
> This is nice .
|
||||
# I/Ddem+ VL3 NPr/VB/J .
|
||||
> That triangle is nice .
|
||||
# NSg/I/C/Ddem+ NSg VL3 NPr/VB/J .
|
||||
> That is nice .
|
||||
# NSg/I/C/Ddem+ VL3 NPr/VB/J .
|
||||
> These triangles are nice .
|
||||
# I/Ddem NPl VB NPr/VB/J .
|
||||
> These are nice .
|
||||
# I/Ddem+ VB NPr/VB/J .
|
||||
> Those triangles are nice .
|
||||
# I/Ddem NPl VB NPr/VB/J .
|
||||
> Those are nice .
|
||||
# I/Ddem+ VB NPr/VB/J .
|
||||
>
|
||||
#
|
||||
> This massage is nice .
|
||||
# I/Ddem+ NSg/V+ VL3 NPr/V/J .
|
||||
> That massage is nice .
|
||||
# NSg/I/C/Ddem NSg/V+ VL3 NPr/V/J .
|
||||
> These massages are nice .
|
||||
# I/Ddem+ NPl/V3+ V NPr/V/J .
|
||||
> Those massages are nice .
|
||||
# I/Ddem+ NPl/V3+ V NPr/V/J .
|
||||
> This massages well .
|
||||
# I/Ddem+ NPl/V3+ NSg/V/J .
|
||||
> That massages well .
|
||||
# NSg/I/C/Ddem+ NPl/V3+ NSg/V/J .
|
||||
> These massage well .
|
||||
# I/Ddem+ NSg/V+ NSg/V/J .
|
||||
> Those massage well .
|
||||
# I/Ddem+ NSg/V+ NSg/V/J .
|
||||
> This massage is nice .
|
||||
# I/Ddem+ NSg/VB+ VL3 NPr/VB/J .
|
||||
> That massage is nice .
|
||||
# NSg/I/C/Ddem NSg/VB+ VL3 NPr/VB/J .
|
||||
> These massages are nice .
|
||||
# I/Ddem+ NPl/V3+ VB NPr/VB/J .
|
||||
> Those massages are nice .
|
||||
# I/Ddem+ NPl/V3+ VB NPr/VB/J .
|
||||
> This massages well .
|
||||
# I/Ddem+ NPl/V3+ NSg/VB/J .
|
||||
> That massages well .
|
||||
# NSg/I/C/Ddem+ NPl/V3+ NSg/VB/J .
|
||||
> These massage well .
|
||||
# I/Ddem+ NSg/VB+ NSg/VB/J .
|
||||
> Those massage well .
|
||||
# I/Ddem+ NSg/VB+ NSg/VB/J .
|
||||
>
|
||||
#
|
||||
> That could be a solution .
|
||||
# NSg/I/C/Ddem+ NSg/VX NSg/VXB D/P+ NSg+ .
|
||||
> Find all candidates that could be a solution .
|
||||
# NSg/V NSg/I/J/C/Dq+ NPl/V3+ NSg/I/C/Ddem+ NSg/VX NSg/VXB D/P+ NSg+ .
|
||||
> That could be a solution .
|
||||
# NSg/I/C/Ddem+ NSg/VXB NSg/VXB D/P+ NSg+ .
|
||||
> Find all candidates that could be a solution .
|
||||
# NSg/VB NSg/I/J/C/Dq+ NPl/V3+ NSg/I/C/Ddem+ NSg/VXB NSg/VXB D/P+ NSg+ .
|
||||
>
|
||||
#
|
||||
> This is all that I have .
|
||||
# I/Ddem+ VL3 NSg/I/J/C/Dq NSg/I/C/Ddem ISg+ NSg/VX .
|
||||
> This is all that solutions can do .
|
||||
# I/Ddem+ VL3 NSg/I/J/C/Dq NSg/I/C/Ddem NPl+ NPr/VX NSg/VX .
|
||||
> That solution can do .
|
||||
# NSg/I/C/Ddem NSg+ NPr/VX NSg/VX .
|
||||
> This is all that I have .
|
||||
# I/Ddem+ VL3 NSg/I/J/C/Dq NSg/I/C/Ddem ISg+ NSg/VXB .
|
||||
> This is all that solutions can do .
|
||||
# I/Ddem+ VL3 NSg/I/J/C/Dq NSg/I/C/Ddem NPl+ NPr/VXB NSg/VXB .
|
||||
> That solution can do .
|
||||
# NSg/I/C/Ddem NSg+ NPr/VXB NSg/VXB .
|
||||
>
|
||||
#
|
||||
> We can do this !
|
||||
# IPl+ NPr/VX NSg/VX I/Ddem+ .
|
||||
> I can do this and that .
|
||||
# ISg+ NPr/VX NSg/VX I/Ddem V/C NSg/I/C/Ddem+ .
|
||||
> We can do this !
|
||||
# IPl+ NPr/VXB NSg/VXB I/Ddem+ .
|
||||
> I can do this and that .
|
||||
# ISg+ NPr/VXB NSg/VXB I/Ddem VB/C NSg/I/C/Ddem+ .
|
||||
>
|
||||
#
|
||||
> We unite to stand united in unity .
|
||||
# IPl+ NSg/V P NSg/V VP/J NPr/J/P NSg+ .
|
||||
> We unite to stand united in unity .
|
||||
# IPl+ NSg/VB P NSg/VB VP/J NPr/J/P Nᴹ+ .
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue