An Armenian stemming algorithm

Links to resources

A stemmer for Armenian was sent to us by Astghik Mkrtchyan with this accompanying email:

From: astghik mkrtchyan <mkrtch.ast@mail.ru>
Date: Sat, 12 Jun 2010 20:27:02 +0400
Subject: Armenian Stemmer

Hello,

I newbie here. Recently I've googled for Armenian stemmer. So I have found
nothing, decided to write one. And now I'm sending Armenian stemmer
(stem_Unicode.sbl) and generated java file.

Regards,
Astghik

The algorithm in Snowball

stringescapes {}

stringdef a    hex '561' // 531
stringdef b    hex '562' // 532
stringdef g    hex '563' // 533
stringdef d    hex '564' // 534
stringdef ye   hex '565' // 535
stringdef z    hex '566' // 536
stringdef e    hex '567' // 537
stringdef y    hex '568' // 538
stringdef dt   hex '569' // 539
stringdef zh   hex '56A' // 53A
stringdef i    hex '56B' // 53B
stringdef l    hex '56C' // 53C
stringdef kh   hex '56D' // 53D
stringdef ts   hex '56E' // 53E
stringdef k    hex '56F' // 53F
stringdef h    hex '570' // 540
stringdef dz   hex '571' // 541
stringdef gh   hex '572' // 542
stringdef djch hex '573' // 543
stringdef m    hex '574' // 544
stringdef j    hex '575' // 545
stringdef n    hex '576' // 546
stringdef sh   hex '577' // 547
stringdef vo   hex '578' // 548
stringdef ch   hex '579' // 549
stringdef p    hex '57A' // 54A
stringdef dj   hex '57B' // 54B
stringdef r    hex '57C' // 54C
stringdef s    hex '57D' // 54D
stringdef v    hex '57E' // 54E
stringdef t    hex '57F' // 54F
stringdef r'   hex '580' // 550
stringdef c    hex '581' // 551
stringdef u    hex '582' // 552                  //vjun
stringdef bp   hex '583' // 553
stringdef q    hex '584' // 554
stringdef ev   hex '587'
stringdef o    hex '585' // 555
stringdef f    hex '586' // 556

routines ( mark_regions R2
           adjective
           verb
           noun
           ending
)

externals ( stem )

integers ( pV p2 )

groupings ( v )

define v '{a}{e}{i}{o}{u}{ye}{vo}{y}'

define mark_regions as (

    $pV = limit
    $p2 = limit
    do (
        gopast v  setmark pV  gopast non-v
        gopast v  gopast non-v  setmark p2
       )
)

backwardmode (

    define R2 as $p2 <= cursor

    define adjective as (
        [substring] among (
            '{b}{a}{r'}'
            '{p}{ye}{s}'
            '{vo}{r'}{e}{n}'
            '{vo}{v}{i}{n}'
            '{a}{k}{i}'
            '{l}{a}{j}{n}'
            '{r'}{vo}{r'}{d}'
            '{ye}{r'}{vo}{r'}{d}'
            '{a}{k}{a}{n}'
            '{a}{l}{i}'
            '{k}{vo}{t}'
            '{ye}{k}{ye}{n}'
            '{vo}{r'}{a}{k}'
            '{ye}{gh}'
            '{v}{vo}{u}{n}'
            '{ye}{r'}{ye}{n}'
            '{a}{r'}{a}{n}'
            '{ye}{n}'
            '{a}{v}{ye}{t}'
            '{g}{i}{n}'
            '{i}{v}'
            '{a}{t}'
            '{i}{n}'

              (delete)
        )
    )

    define verb as (
        [substring] among (
            '{vo}{u}{m}'
            '{v}{vo}{u}{m}'
            '{a}{l}{vo}{u}'
            '{ye}{l}{vo}{u}'
            '{v}{ye}{l}'
            '{a}{n}{a}{l}'
            '{ye}{l}{vo}{u}{c}'
            '{a}{l}{vo}{u}{c}'
            '{y}{a}{l}'
            '{y}{ye}{l}'
            '{a}{l}{vo}{v}'
            '{ye}{l}{vo}{v}'
            '{a}{l}{i}{s}'
            '{ye}{l}{i}{s}'
            '{ye}{n}{a}{l}'
            '{a}{c}{n}{a}{l}'
            '{ye}{c}{n}{ye}{l}'
            '{c}{n}{ye}{l}'
            '{n}{ye}{l}'
            '{a}{t}{ye}{l}'
            '{vo}{t}{ye}{l}'
            '{k}{vo}{t}{ye}{l}'
            '{t}{ye}{l}'
            '{v}{a}{ts}'
            '{ye}{c}{v}{ye}{l}'
            '{a}{c}{v}{ye}{l}'
            '{ye}{c}{i}{r'}'
            '{a}{c}{i}{r'}'
            '{ye}{c}{i}{n}{q}'
            '{a}{c}{i}{n}{q}'
            '{v}{ye}{c}{i}{r'}'
            '{v}{ye}{c}{i}{n}{q}'
            '{v}{ye}{c}{i}{q}'
            '{v}{ye}{c}{i}{n}'
            '{a}{c}{r'}{i}{r'}'
            '{a}{c}{r'}{ye}{c}'
            '{a}{c}{r'}{i}{n}{q}'
            '{a}{c}{r'}{i}{q}'
            '{a}{c}{r'}{i}{n}'
            '{ye}{c}{i}{q}'
            '{a}{c}{i}{q}'
            '{ye}{c}{i}{n}'
            '{a}{c}{i}{n}'
            '{a}{c}{a}{r'}'
            '{a}{c}{a}{v}'
            '{a}{c}{a}{n}{q}'
            '{a}{c}{a}{q}'
            '{a}{c}{a}{n}'
            '{v}{ye}{c}{i}'
            '{a}{c}{r'}{i}'
            '{ye}{c}{a}{r'}'
            '{ye}{c}{a}{v}'
            '{c}{a}{n}{q}'
            '{c}{a}{q}'
            '{c}{a}{n}'
            '{a}{c}{a}'
            '{a}{c}{i}'
            '{ye}{c}{a}'
            '{ch}{ye}{l}'
            '{ye}{c}{i}'
            '{a}{r'}'
            '{a}{v}'
            '{a}{n}{q}'
            '{a}{q}'
            '{a}{n}'
            '{a}{l}'
            '{ye}{l}'
            '{ye}{c}'
            '{a}{c}'
            '{v}{ye}'
            '{a}'

                (delete)
        )
    )

    define noun as (
        [substring] among (
            '{a}{ts}{vo}'
            '{a}{n}{a}{k}'
            '{a}{n}{o}{c}'
            '{a}{r'}{a}{n}'
            '{a}{r'}{q}'
            '{p}{a}{n}'
            '{s}{t}{a}{n}'
            '{ye}{gh}{e}{n}'
            '{ye}{n}{q}'
            '{i}{k}'
            '{i}{ch}'
            '{i}{q}'
            '{m}{vo}{u}{n}{q}'
            '{j}{a}{k}'
            '{j}{vo}{u}{n}'
            '{vo}{n}{q}'
            '{vo}{r'}{d}'
            '{vo}{c}'
            '{ch}{ye}{q}'
            '{v}{a}{ts}{q}'
            '{v}{vo}{r'}'
            '{a}{v}{vo}{r'}'
            '{vo}{u}{dt}{j}{vo}{u}{n}'
            '{vo}{u}{k}'
            '{vo}{u}{h}{i}'
            '{vo}{u}{j}{dt}'
            '{vo}{u}{j}{q}'
            '{vo}{u}{s}{t}'
            '{vo}{u}{s}'
            '{c}{i}'
            '{a}{l}{i}{q}'
            '{a}{n}{i}{q}'
            '{i}{l}'
            '{i}{ch}{q}'
            '{vo}{u}{n}{q}'
            '{g}{a}{r'}'
            '{vo}{u}'
            '{a}{k}'
            '{a}{n}'
            '{q}'

                (delete)
        )
    )

    define ending as (
        [substring] R2 among (
            '{n}{ye}{r'}{y}'
            '{n}{ye}{r'}{n}'
            '{n}{ye}{r'}{i}'
            '{n}{ye}{r'}{d}'
            '{ye}{r'}{i}{c}'
            '{n}{ye}{r'}{i}{c}'
            '{ye}{r'}{i}'
            '{ye}{r'}{d}'
            '{ye}{r'}{n}'
            '{ye}{r'}{y}'
            '{n}{ye}{r'}{i}{n}'
            '{vo}{u}{dt}{j}{a}{n}{n}'
            '{vo}{u}{dt}{j}{a}{n}{y}'
            '{vo}{u}{dt}{j}{a}{n}{s}'
            '{vo}{u}{dt}{j}{a}{n}{d}'
            '{vo}{u}{dt}{j}{a}{n}'
            '{ye}{r'}{i}{n}'
            '{i}{n}'
            '{s}{a}'
            '{vo}{dj}'
            '{i}{c}'
            '{ye}{r'}{vo}{v}'
            '{n}{ye}{r'}{vo}{v}'
            '{ye}{r'}{vo}{u}{m}'
            '{n}{ye}{r'}{vo}{u}{m}'
            '{vo}{u}{n}'
            '{vo}{u}{d}'
            '{v}{a}{n}{s}'
            '{v}{a}{n}{y}'
            '{v}{a}{n}{d}'
            '{a}{n}{y}'
            '{a}{n}{d}'
            '{v}{a}{n}'
            '{vo}{dj}{y}'
            '{vo}{dj}{s}'
            '{vo}{dj}{d}'
            '{vo}{c}'
            '{vo}{u}{c}'
            '{vo}{dj}{i}{c}'
            '{c}{i}{c}'
            '{v}{i}{c}'
            '{v}{i}'
            '{v}{vo}{v}'
            '{vo}{v}'
            '{a}{n}{vo}{v}'
            '{a}{n}{vo}{u}{m}'
            '{v}{a}{n}{i}{c}'
            '{a}{m}{b}'
            '{a}{n}'
            '{n}{ye}{r'}'
            '{ye}{r'}'
            '{v}{a}'
            '{y}'
            '{n}'
            '{d}'
            '{c}'
            '{i}'

                (delete)
        )
    )
)

define stem as (

    do mark_regions
    backwards setlimit tomark pV for (
        do ending
        do verb
        do adjective
        do noun
    )
)