Here is a sample of Yiddish vocabulary, with the stemmed forms that will be generated by this algorithm:
word | stem | word | stem | |||||||||
אַװעקבלאָנדזשען אַװעקבלאָנדזשענדיק אַװעקבלאָנדזשענדיקן אַװעקבלאָנדזשענדיקס אַװעקבלאָנדזשענדיקע אַװעקבלאָנדזשענדיקער אַװעקגײן אַװעקגײנדיק אַװעקגײנדיקן אַװעקגײנדיקס אַװעקגײנדיקע אַװעקגײנדיקער אַװעקגנבֿענען אַװעקגנבֿענענדיק אַװעקגנבֿענענדיקן אַװעקגנבֿענענדיקס אַװעקגנבֿענענדיקע אַװעקגנבֿענענדיקער אַװעקגעבלאָנדזשעט אַװעקגעבלאָנדזשעטן אַװעקגעבלאָנדזשעטס אַװעקגעבלאָנדזשעטע אַװעקגעבלאָנדזשעטער אַװעקגעבן אַװעקגעבנדיק אַװעקגעבנדיקן אַװעקגעבנדיקס אַװעקגעבנדיקע אַװעקגעבנדיקער אַװעקגעגאַנגען אַװעקגעגאַנגענס אַװעקגעגאַנגענע אַװעקגעגאַנגענעם אַװעקגעגאַנגענער אַװעקגעגנבֿעט |
⇒ |
אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקגײ אװעקגײ אװעקגײ אװעקגײ אװעקגײ אװעקגײ אװעקגנבענ אװעקגנבענ אװעקגנבענ אװעקגנבענ אװעקגנבענ אװעקגנבענ אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקבלאנדזש אװעקגעב אװעקגעב אװעקגעב אװעקגעב אװעקגעב אװעקגעב אװעקגײ אװעקגײ אװעקגײ אװעקגײ אװעקגײ אװעקגנב |
אַבֿידות אַבסטראַקטסטער אַדורכבײַסנדיקער אַדורכגעביסן אַדורכגעשמועסט אַדורכפֿירנדיק אַװעקגעגאַנגען אַװעקגעגאַנגענעם אַװעקגענומענער אמתדיק אמתדיקן אמתדיקע אמתדיקער באַהאַלטן ביכער געאַכלט געאײַלט געאײַלן געבאָדענעם געבאָטענעם געשדכנטע עראָפּלאַנען פֿאַרגאַנגענהײט פֿאָרױסגעגאַנגענע קינדהײט װילן װילסט |
⇒ |
אבידה אבסטראקט אדורכבײס אדורכבײס אדורכשמוע אדורכפיר אװעקגײ אװעקגײ אװעקנעמ אמת אמת אמת אמת באהאל ביכ אכל אײל אײל באד באט שדכנ עראפלאנ פארגאנגענ פארױסגײ קינד װיל װיל |
We setup the following groupings:
Only a single marker is used: P1. To begin with, this is set at the end of the word.
We are now at the start of the main portion of the word (past any verbal prefix and past participle marker).
Unless otherwise stated, all deletes ensure we are beyond P1.
In each pass, at the first level of bullets, the longest matching suffix always wins.
First pass:
Second pass - after the first pass, do the following to the remaining stem:
Third pass - after the second pass, do the following to the remaining stem:
/* *******************************************
* Stemmer for Yiddish language in YIVO script
*
* Author: Assaf Urieli
* Emails: assaf.urieli at gmail.com
* Version: 0.1 (15.05.2020)
*
********************************************* */
routines (
prelude
mark_regions
R1
R1plus3
standard_suffix
)
externals ( stem )
integers ( p1 x )
groupings ( vowel niked alefBeys consonant )
stringescapes {}
// AlefBeys
stringdef Alef '{U+05D0}'
stringdef Beys '{U+05D1}'
stringdef Giml '{U+05D2}'
stringdef Dalet '{U+05D3}'
stringdef Hey '{U+05D4}'
stringdef Vov '{U+05D5}'
stringdef Zayen '{U+05D6}'
stringdef Khes '{U+05D7}'
stringdef Tes '{U+05D8}'
stringdef Yud '{U+05D9}'
stringdef LangerKhof '{U+05DA}'
stringdef Khof '{U+05DB}'
stringdef Lamed '{U+05DC}'
stringdef ShlosMem '{U+05DD}'
stringdef Mem '{U+05DE}'
stringdef LangerNun '{U+05DF}'
stringdef Nun '{U+05E0}'
stringdef Samekh '{U+05E1}'
stringdef Ayen '{U+05E2}'
stringdef LangerFey '{U+05E3}'
stringdef Fey '{U+05E4}'
stringdef LangerTsadek '{U+05E5}'
stringdef Tsadek '{U+05E6}'
stringdef Kuf '{U+05E7}'
stringdef Reysh '{U+05E8}'
stringdef Shin '{U+05E9}'
stringdef Sof '{U+05EA}'
stringdef TsveyVovn '{U+05F0}'
stringdef VovYud '{U+05F1}'
stringdef TsveyYudn '{U+05F2}'
// Niked
stringdef Shvo '{U+05B0}'
stringdef Khirik '{U+05B4}'
stringdef Tseyre '{U+05B5}'
stringdef Segl '{U+05B6}'
stringdef ReducedSegl '{U+05B1}'
stringdef Pasekh '{U+05B7}'
stringdef ReducedPasekh '{U+05B2}'
stringdef Komets '{U+05B8}'
stringdef ReducedKomets '{U+05B3}'
stringdef Rafe '{U+05BF}'
stringdef SinDot '{U+05C2}'
stringdef ShinDot '{U+05C1}'
stringdef Khoylm '{U+05B9}'
stringdef Melupm '{U+05BC}'
stringdef Kubuts '{U+05BB}'
// Groupings
define niked '{Shvo}{Khirik}{Tseyre}{Segl}{ReducedSegl}{Pasekh}{ReducedPasekh}{Komets}{ReducedKomets}{SinDot}{ShinDot}{Khoylm}{Melupm}{Kubuts}{Rafe}'
define alefBeys '{Alef}{Beys}{Giml}{Dalet}{Hey}{Vov}{Zayen}{Khes}{Tes}{Yud}{LangerKhof}{Khof}{Lamed}{ShlosMem}{Mem}{LangerNun}{Nun}{Samekh}{Ayen}{LangerFey}{Fey}{LangerTsadek}{Tsadek}{Kuf}{Reysh}{Shin}{Sof}{TsveyVovn}{VovYud}{TsveyYudn}'
define vowel '{Alef}{Vov}{Yud}{Ayen}{VovYud}{TsveyYudn}'
define consonant alefBeys - vowel
define prelude as (
do (
repeat goto (
[substring] among (
'{Vov}{Vov}' ( not '{Melupm}' <- '{TsveyVovn}' )
'{Vov}{Yud}' ( not '{Khirik}' <- '{VovYud}' )
'{Yud}{Yud}' ( not '{Khirik}' <- '{TsveyYudn}' )
'{LangerKhof}' ( <- '{Khof}')
'{ShlosMem}' ( <- '{Mem}' )
'{LangerNun}' ( <- '{Nun}' )
'{LangerFey}' ( <- '{Fey}' )
'{LangerTsadek}' ( <- '{Tsadek}' )
)
)
)
do (repeat goto ( [niked] delete ))
)
define mark_regions as (
$p1 = limit
(
try (
// Replace past participle ge- at start of word
// Unless word starts with gelt- or gebn-
['{Giml}{Ayen}']
not ('{Lamed}{Tes}' or '{Beys}{Nun}') <- 'GE'
)
try (
// skip verbal prefix
among(
// Free stressed: Adurkh-, Durkh-, Ahin-, Aher-, Avek-, Mit-, Antkegn-, Akegn-, Anider-, Arop-, Aroys-, Aroyf-, Arum-, Arayn-, Arunter-, Ariber-, Nokh-, Farbay-, Aheym-, Afir-, Faroys-, Funander-, Tsuzamen-, Tsunoyf-, Tsurik-
'{Alef}{Dalet}{Vov}{Reysh}{Khof}' '{Dalet}{Vov}{Reysh}{Khof}' '{Alef}{Hey}{Yud}{Nun}' '{Alef}{Hey}{Ayen}{Reysh}' '{Alef}{TsveyVovn}{Ayen}{Kuf}' '{Mem}{Yud}{Tes}' '{Alef}{Nun}{Tes}{Kuf}{Ayen}{Giml}{Nun}' '{Alef}{Kuf}{Ayen}{Giml}{Nun}' '{Alef}{Nun}{Yud}{Dalet}{Ayen}{Reysh}' '{Alef}{Reysh}{Alef}{Fey}' '{Alef}{Reysh}{VovYud}{Samekh}' '{Alef}{Reysh}{VovYud}{Fey}' '{Alef}{Reysh}{Vov}{Mem}' '{Alef}{Reysh}{TsveyYudn}{Nun}' '{Alef}{Reysh}{Vov}{Nun}{Tes}{Ayen}{Reysh}' '{Alef}{Reysh}{Yud}{Beys}{Ayen}{Reysh}' '{Nun}{Alef}{Khof}' '{Fey}{Alef}{Reysh}{Beys}{TsveyYudn}' '{Alef}{Hey}{TsveyYudn}{Mem}' '{Alef}{Fey}{Yud}{Reysh}' '{Fey}{Alef}{Reysh}{VovYud}{Samekh}' '{Fey}{Vov}{Nun}{Alef}{Nun}{Dalet}{Ayen}{Reysh}' '{Tsadek}{Vov}{Zayen}{Alef}{Mem}{Ayen}{Nun}' '{Tsadek}{Vov}{Nun}{VovYud}{Fey}' '{Tsadek}{Vov}{Reysh}{Yud}{Kuf}'
// Stressed: Oys-, Oyf-, Um-, Unter-, Iber-, Ayn-, On-, Op-, Bay-, For-, Tsu-.
'{Alef}{VovYud}{Samekh}' '{Alef}{VovYud}{Fey}' '{Alef}{Vov}{Mem}' '{Alef}{Vov}{Nun}{Tes}{Ayen}{Reysh}' '{Alef}{Yud}{Beys}{Ayen}{Reysh}' '{Alef}{TsveyYudn}{Nun}' '{Alef}{Nun}' '{Alef}{Fey}' '{Beys}{TsveyYudn}' '{Fey}{Alef}{Reysh}' '{Tsadek}{Vov}'
// Unstressed: Ant-, Ba-, Der-, Tse-. Far- already covered by For-. Ge- comes later.
'{Alef}{Nun}{Tes}' '{Beys}{Alef}' '{Dalet}{Ayen}{Reysh}' '{Tsadek}{Ayen}'
// If verbal prefix followed by Tsu- or Ge-, replace it
(
// Don't mark the TSU- prefix inside verbs like "oys-tsugn"
test (('{Tsadek}{Vov}{Giml}{Nun}' or '{Tsadek}{Vov}{Kuf}{Tes}' or '{Tsadek}{Vov}{Kuf}{Nun}') atlimit)
or
// Don't mark the GE- prefix inside verbs like "avek-gebn"
test ('{Giml}{Ayen}{Beys}{Nun}')
or
( ['{Giml}{Ayen}'] <- 'GE')
or
(['{Tsadek}{Vov}'] <- 'TSU')
)
)
)
test(hop 3 setmark x)
// We want to allow three-consonant Hebrew roots.
// To this end, we skip three-consonant combinations that exist in non-Hebraic Yiddish.
try (
among(
'{Shin}{Fey}{Reysh}' '{Shin}{Tes}{Reysh}' '{Shin}{Tes}{Shin}' '{Dalet}{Zayen}{Shin}'
( true )
)
)
// Either 3 consonants or the first non-vowel after a vowel
(
not (consonant consonant consonant setmark p1)
goto vowel repeat vowel setmark p1
)
try($p1 < x $p1 = x) // at least 3 past the prefix
)
)
backwardmode (
define R1 as $p1 <= cursor
// Like R1, but also allows the cursor to be outside R1 by the width of Giml Yud Samekh
define R1plus3 as $p1 <= cursor + sizeof '{Giml}{Yud}{Samekh}'
define standard_suffix as (
do (
[substring] among(
// Plural/adjective endings: -er, -ers, -e, -n, -s, -en, -ns, -eners, -ens, -es
'{Ayen}{Reysh}{Samekh}' '{Ayen}{Nun}' '{Nun}{Samekh}' '{Ayen}{Nun}{Ayen}{Reysh}{Samekh}' '{Ayen}{Samekh}' '{Ayen}' '{Nun}' '{Samekh}' '{Ayen}{Mem}' '{Ayen}{Reysh}'
( R1 delete )
// Exception: don't delete noun endings -ie, like "agitatsie"
'{Yud}{Ayen}'
( true )
// -ies => ie
'{Yud}{Ayen}{Samekh}'
( R1 <- '{Yud}{Ayen}' )
// Plural/adjective endings: -enem, -ener, -ene, -ens
'{Ayen}{Nun}{Ayen}' '{Ayen}{Nun}{Ayen}{Mem}' '{Ayen}{Nun}{Ayen}{Reysh}' '{Ayen}{Nun}{Samekh}'
(R1 delete
[substring] among (
// -gegangen => -gey
'{Giml}{Alef}{Nun}{Giml}' (<- '{Giml}{TsveyYudn}')
// -genumen => -nem
'{Nun}{Vov}{Mem}' (<- '{Nun}{Ayen}{Mem}')
// -gemiten => -mayd
'{Mem}{Yud}{Tes}' (<- '{Mem}{TsveyYudn}{Dalet}')
// -gebiten => -bayt
'{Beys}{Yud}{Tes}' (<- '{Beys}{TsveyYudn}{Tes}')
// -gebisen => -bays
'{Beys}{Yud}{Samekh}' (<- '{Beys}{TsveyYudn}{Samekh}')
// -gevizen => -vayz
'{TsveyVovn}{Yud}{Zayen}' (<- '{TsveyVovn}{TsveyYudn}{Zayen}')
// -getriben => -trayb
'{Tes}{Reysh}{Yud}{Beys}' (<- '{Tes}{Reysh}{TsveyYudn}{Beys}')
// -geliten => -layt
'{Lamed}{Yud}{Tes}' (<- '{Lamed}{TsveyYudn}{Tes}')
// -gekliben => -klayb
'{Kuf}{Lamed}{Yud}{Beys}' (<- '{Kuf}{Lamed}{TsveyYudn}{Beys}')
// -geriben => -rayb
'{Reysh}{Yud}{Beys}' (<- '{Reysh}{TsveyYudn}{Beys}')
// -gerisen => -rays
'{Reysh}{Yud}{Samekh}' (<- '{Reysh}{TsveyYudn}{Samekh}')
// -geshvigen => -shvayg
'{Shin}{TsveyVovn}{Yud}{Giml}' (<- '{Shin}{TsveyVovn}{TsveyYudn}{Giml}')
// -geshmisen => -shmays
'{Shin}{Mem}{Yud}{Samekh}' (<- '{Shin}{Mem}{TsveyYudn}{Samekh}')
// -geshniten => -shnayd
'{Shin}{Nun}{Yud}{Tes}' (<- '{Shin}{Nun}{TsveyYudn}{Dalet}')
// -geshriben => -shrayb
'{Shin}{Reysh}{Yud}{Beys}' (<- '{Shin}{Reysh}{TsveyYudn}{Beys}')
// -gebunden => -bind
'{Beys}{Vov}{Nun}{Dalet}' (<- '{Beys}{Yud}{Nun}{Dalet}')
// -gevuntshn => -vintsh
'{TsveyVovn}{Vov}{Tes}{Shin}' (<- '{TsveyVovn}{Yud}{Tes}{Shin}')
// -gezungen => -zing
'{Zayen}{Vov}{Nun}{Giml}' (<- '{Zayen}{Yud}{Nun}{Giml}')
// -getrunken => -trink
'{Tes}{Reysh}{Vov}{Nun}{Kuf}' (<- '{Tes}{Reysh}{Yud}{Nun}{Kuf}')
// -getsvungen => -tsving
'{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}' (<- '{Tsadek}{TsveyVovn}{Yud}{Nun}{Giml}')
// -geshlungen => -shling
'{Shin}{Lamed}{Vov}{Nun}{Giml}' (<- '{Shin}{Lamed}{Yud}{Nun}{Giml}')
// -geboygen => -beyg
'{Beys}{VovYud}{Giml}' (<- '{Beys}{TsveyYudn}{Giml}')
// -gehoyben => -heyb
'{Hey}{VovYud}{Beys}' (<- '{Hey}{TsveyYudn}{Beys}')
// -farloyren => -farlir
'{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}' (<- '{Fey}{Alef}{Reysh}{Lamed}{Yud}{Reysh}')
// -shtanen => -shtey
'{Shin}{Tes}{Alef}{Nun}' (<- '{Shin}{Tes}{TsveyYudn}')
// -geshvoyrn => -shver
'{Shin}{TsveyVovn}{VovYud}{Reysh}' (<- '{Shin}{TsveyVovn}{Ayen}{Reysh}')
)
)
// Verb/past participle ending: -t
'{Tes}'
( R1 delete )
// As well as noun/adjectives ending in -tn, -te, -ter, -ts so that the "-t" doesn't differentiate
// Similarly for past participles: -tns, -tene, -tenem, -tener
// If the Tes was before R1, we try to perform the same action while leaving the Tes in place
'{Tes}{Nun}' '{Tes}{Ayen}' '{Tes}{Ayen}{Reysh}' '{Tes}{Samekh}'
'{Tes}{Nun}{Samekh}' '{Tes}{Ayen}{Nun}{Ayen}' '{Tes}{Ayen}{Nun}{Ayen}{Mem}' '{Tes}{Ayen}{Nun}{Ayen}{Reysh}'
( ((R1 delete) or ( <- '{Tes}'))
// -(ge)brakht => -breng
['{Beys}{Reysh}{Alef}{Khof}' try '{Giml}{Ayen}'] <- '{Beys}{Reysh}{Ayen}{Nun}{Giml}'
)
// Past participles: -et, -etn, -ets, -ete, -eter
'{Ayen}{Tes}' '{Ayen}{Tes}{Nun}' '{Ayen}{Tes}{Samekh}' '{Ayen}{Tes}{Ayen}' '{Ayen}{Tes}{Ayen}{Reysh}'
( R1 delete )
// -geyn shorted to -gey
'{Giml}{TsveyYudn}{Nun}'
( <- '{Giml}{TsveyYudn}')
// ##################### Long list of irregular past participles
// -(ge)gangen (shortened to -gangen after prefixes) => -gey
'{Giml}{Alef}{Nun}{Giml}{Ayen}{Nun}'
( <- '{Giml}{TsveyYudn}' )
// -(ge)numen (shortened to -numen after prefixes) => -nem
'{Nun}{Vov}{Mem}{Ayen}{Nun}'
(<- '{Nun}{Ayen}{Mem}' )
// -(ge)shribn (shortened to -shribn after prefixes) => -shrayb
'{Shin}{Reysh}{Yud}{Beys}{Nun}'
(<- '{Shin}{Reysh}{TsveyYudn}{Beys}' )
// -gemiten => -mayd
'GE{Mem}{Yud}{Tes}{Nun}'
(<- '{Mem}{TsveyYudn}{Dalet}')
// -gebiten => -bayt
'GE{Beys}{Yud}{Tes}{Nun}'
(<- '{Beys}{TsveyYudn}{Tes}')
// -gebisen => -bays
'GE{Beys}{Yud}{Samekh}{Nun}'
( <- '{Beys}{TsveyYudn}{Samekh}')
// -gevizen => -vayz
'{TsveyVovn}{Yud}{Zayen}{Nun}'
( <- '{TsveyVovn}{TsveyYudn}{Zayen}')
// -getriben => -trayb
'{Tes}{Reysh}{Yud}{Beys}{Nun}'
( <- '{Tes}{Reysh}{TsveyYudn}{Beys}')
// -geliten => -layt
'GE{Lamed}{Yud}{Tes}{Nun}'
( <- '{Lamed}{TsveyYudn}{Tes}')
// -gekliben => -klayb
'{Kuf}{Lamed}{Yud}{Beys}{Nun}'
( <- '{Kuf}{Lamed}{TsveyYudn}{Beys}')
// -geriben => -rayb
'{Reysh}{Yud}{Beys}{Nun}'
( <- '{Reysh}{TsveyYudn}{Beys}')
// -gerisen => -rays
'GE{Reysh}{Yud}{Samekh}{Nun}'
( <- '{Reysh}{TsveyYudn}{Samekh}')
// -geshvigen => -shvayg
'{Shin}{TsveyVovn}{Yud}{Giml}{Nun}'
( <- '{Shin}{TsveyVovn}{TsveyYudn}{Giml}')
// -geshmisen => -shmays
'{Shin}{Mem}{Yud}{Samekh}{Nun}'
( <- '{Shin}{Mem}{TsveyYudn}{Samekh}')
// -geshniten => -shnayd
'{Shin}{Nun}{Yud}{Tes}{Nun}'
( <- '{Shin}{Nun}{TsveyYudn}{Dalet}')
// -gebunden => -bind
'{Beys}{Vov}{Nun}{Dalet}{Nun}'
( <- '{Beys}{Yud}{Nun}{Dalet}')
// -gevuntshn => -vintsh
'{TsveyVovn}{Vov}{Tes}{Shin}{Nun}'
( <- '{TsveyVovn}{Yud}{Tes}{Shin}')
// -gezungen => -zing
'{Zayen}{Vov}{Nun}{Giml}{Nun}'
( <- '{Zayen}{Yud}{Nun}{Giml}')
// -getrunken => -trink
'{Tes}{Reysh}{Vov}{Nun}{Kuf}{Nun}'
( <- '{Tes}{Reysh}{Yud}{Nun}{Kuf}')
// -getsvungen => -tsving
'{Tsadek}{TsveyVovn}{Vov}{Nun}{Giml}{Nun}'
( <- '{Tsadek}{TsveyVovn}{Yud}{Nun}{Giml}')
// -geshlungen => -shling
'{Shin}{Lamed}{Vov}{Nun}{Giml}{Nun}'
( <- '{Shin}{Lamed}{Yud}{Nun}{Giml}')
// -geboygen => -beyg
'{Beys}{VovYud}{Giml}{Nun}'
( <- '{Beys}{TsveyYudn}{Giml}')
// -gehoyben => -heyb
'{Hey}{VovYud}{Beys}{Nun}'
( <- '{Hey}{TsveyYudn}{Beys}')
// -farloyren => -farlir
'{Fey}{Alef}{Reysh}{Lamed}{VovYud}{Reysh}{Nun}'
( <- '{Fey}{Alef}{Reysh}{Lamed}{Yud}{Reysh}')
// -shtanen => -shtey
'{Shin}{Tes}{Alef}{Nun}{Ayen}{Nun}'
( <- '{Shin}{Tes}{TsveyYudn}')
// -geshvoyrn => -shver
'{Shin}{TsveyVovn}{VovYud}{Reysh}{Nun}'
( <- '{Shin}{TsveyVovn}{Ayen}{Reysh}')
// -(ge)brakht (shortened to -brakht after prefixes) => -breng
'{Beys}{Reysh}{Alef}{Khof}{Tes}'
(<- '{Beys}{Reysh}{Ayen}{Nun}{Giml}' )
// ###### End of irregular past participles
// Noun endings: -ung, -hayt, -kayt, -ikayt, -shaft
'{Vov}{Nun}{Giml}' '{Hey}{TsveyYudn}{Tes}' '{Kuf}{TsveyYudn}{Tes}' '{Yud}{Kuf}{TsveyYudn}{Tes}' '{Shin}{Alef}{Fey}{Tes}'
( R1 delete )
// Noun endings: -izm, izmen
'{Yud}{Zayen}{Mem}' '{Yud}{Zayen}{Mem}{Ayen}{Nun}'
( R1 delete )
// Plural ending: -im
'{Yud}{Mem}'
( R1 delete )
// Plural ending: -os (Hebraic), replace with -h
'{Vov}{Sof}'
( R1 <- '{Hey}' )
// Diminutive endings: -elekh, -ele, -lekh, -eles, -elen
'{Ayen}{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}' '{Lamed}{Ayen}{Khof}' '{Ayen}{Lamed}{Ayen}{Samekh}' '{Ayen}{Lamed}{Ayen}{Nun}'
( R1 delete )
// Noun ending: -ist
'{Yud}{Samekh}{Tes}'
(
// Exceptions: -gist, -shist
( ('{Giml}' or '{Shin}') try (R1plus3 <- '{Yud}{Samekh}') )
or
( R1 delete )
)
// Noun ending: -istn
'{Yud}{Samekh}{Tes}{Nun}'
( R1 delete )
// Verb ending: -stu
'{Samekh}{Tes}{Vov}'
( R1 delete )
// Superlative ending: -ster, -ste, -stn
'{Samekh}{Tes}{Ayen}{Reysh}' '{Samekh}{Tes}{Ayen}' '{Samekh}{Tes}{Nun}'
( R1 delete )
// Ambiguous verb ending: -st
'{Samekh}{Tes}'
( R1 delete )
)
)
do (
[substring] among(
// Noun endings: -ung, -hayt, -kayt, -ikayt, -shaft
'{Vov}{Nun}{Giml}' '{Hey}{TsveyYudn}{Tes}' '{Kuf}{TsveyYudn}{Tes}' '{Yud}{Kuf}{TsveyYudn}{Tes}' '{Shin}{Alef}{Fey}{Tes}'
( R1 delete )
// Diminutive endings: -l
'{Lamed}'
( R1 consonant delete )
)
)
do (
[substring] among(
// Adjective endings: -ig, -ik, -ish, -nik, -dik
'{Yud}{Giml}' '{Yud}{Kuf}' '{Yud}{Shin}' '{Nun}{Yud}{Kuf}' '{Dalet}{Yud}{Kuf}'
( R1 delete )
// Exceptions to above: -blik, -glik
'{Beys}{Lamed}{Yud}{Kuf}' '{Giml}{Lamed}{Yud}{Kuf}'
( true )
// Present participle endings: -ndik
'{Nun}{Dalet}{Yud}{Kuf}'
( R1 delete )
// Present participle ending -endik: delete if after a -ng, -nk, -n, -m, consonant+l, or vowel.
// Otherwise, delete just the -ndik part.
'{Ayen}{Nun}{Dalet}{Yud}{Kuf}'
( R1 delete )
)
)
do (repeat goto ( ['GE' or 'TSU'] delete ))
)
)
define stem as (
do prelude
do mark_regions
backwards
do standard_suffix
)