New Immissions/Updates:
boundless - educate - edutalab - empatico - es-ebooks - es16 - fr16 - fsfiles - hesperian - solidaria - wikipediaforschools
- wikipediaforschoolses - wikipediaforschoolsfr - wikipediaforschoolspt - worldmap -

See also: Liber Liber - Libro Parlato - Liber Musica  - Manuzio -  Liber Liber ISO Files - Alphabetical Order - Multivolume ZIP Complete Archive - PDF Files - OGG Music Files -

PROJECT GUTENBERG HTML: Volume I - Volume II - Volume III - Volume IV - Volume V - Volume VI - Volume VII - Volume VIII - Volume IX

Ascolta ""Volevo solo fare un audiolibro"" su Spreaker.
CLASSICISTRANIERI HOME PAGE - YOUTUBE CHANNEL
Privacy Policy Cookie Policy Terms and Conditions
Участник:Maksim-e/zatomobot language.py — Википедия

Участник:Maksim-e/zatomobot language.py

Материал из Википедии — свободной энциклопедии

# -*- coding: utf-8  -*-


class AtomobotLanguageError( Exception ):
    pass



class LanguageText( object ):

    def __init__( self, lang, text ):
        self.lang = lang
        self.text = text
        self.decomposed = tuple( self.lang.decompose( text ) )


    def __str__( self ):
        return self.text


    def __repr__( self ):
        return "LanguageText('%s', %s)" % ( self.lang.code, repr( self.text ) )


    def __hash__( self ):
        return hash( self.decomposed )


    def __cmp__( self, other ):
        if isinstance( other, LanguageText ):
            return cmp( self.decomposed, other.decomposed )
        elif isinstance( other, basestring ):
            otherlangtext = LanguageText( self.lang, other )
            return cmp( self, otherlangtext )
        else:
            raise TypeError()



class AtomobotLanguage( object ):

    plurals = {}


    def __init__( self ):
        self.number = 0
        self.letters = [ u'?' ]
        self.number_by_letter = {}
        self.cgroups_by_first = {}
        self.letters_by_first = {}
        self.ch_types = {}


    def new_number( self ):
        self.number += 1
        return self.number


    def plural( self, number, name ):
        forms = self.plurals.get( name, None )
        if not forms:
            return name
        number = abs( number )
        if number >= 5:
            return forms[5]
        return forms[ number ]


    def add_letter( self, letter, ch_type='letter' ):
        number = self.new_number()
        self.letters.append( letter )
        self.number_by_letter[ letter ] = number
        cgroups = self.cgroups_by_first.setdefault( letter[0], {} )
        samelets = cgroups.setdefault( len( letter ), [] )
        samelets.append( letter )
        self.ch_types.setdefault( ch_type, set() ).add( letter )


    def tidy_letters( self ):
        for letter, cgroups in self.cgroups_by_first.iteritems():
            lst = self.letters_by_first.setdefault( letter, [] )
            for size in reversed( sorted( cgroups.keys() ) ):
                lst.extend( cgroups[ size ] )


    def decompose( self, text ):
        text = text.upper()
        decomposed = []
        pos = 0
        while True:
            if pos >= len( text ):
                break
            ch = text[ pos ]
            firstlets = self.letters_by_first.get( ch, None )
            if not firstlets:
                decomposed.append( 0 )
                pos += 1
                continue
            for firstlet in firstlets:
                if pos + len( firstlet ) > len( text ):
                    continue
                cpos = 1
                mismatched = False
                for fl in firstlet[ 1: ]:
                    if fl != text[ pos + cpos ]:
                        mismatched = True
                        break
                if mismatched:
                    continue
                decomposed.append( self.number_by_letter[ firstlet ] )
                pos += len( firstlet )
                break
        return decomposed


    def compose( self, decotext ):
        return u''.join( [ self.letters[ number ] for number in decotext ] )


    def compare_ci( self, text1, text2 ):
        dec1 = self.decompose( text1 )
        dec2 = self.decompose( text2 )
        return cmp( dec1, dec2 )




class AtomobotLanguageSlovak( AtomobotLanguage ):

    code = 'sk'

    plurals = {
        'článok': ( u'článkov', u'článok', u'články', u'články', u'články', u'článkov' ),
        'kategória': ( u'kategórií', u'kategória', u'kategórie', u'kategórie', u'kategórie', u'kategórií' ),
        }

    TEXT_UPDATE = u'Atomobot :: aktualizácia'

    MONTH_NAME = {
        1: u'január', 2: u'február', 3: u'marec', 4: u'apríl', 5: u'máj', 6: u'jún', 7: u'júl',
        8: u'august', 9: u'september', 10: u'október', 11: u'november', 12: u'december' }

    MONTH_NAME_GEN = {
        1: u'januára', 2: u'februára', 3: u'marca', 4: u'apríla', 5: u'mája', 6: u'júna', 7: u'júla',
        8: u'augusta', 9: u'septembra', 10: u'októbra', 11: u'novembra', 12: u'decembra' }
    
    
    def __init__( self ):
        super( AtomobotLanguageSlovak, self ).__init__()
        self.init_collated_letters()
        self.tidy_letters()


    def format_number( self, number, places=0 ):
        minus = False
        number = float( number )
        formatstr = '%%.%sf' % places
        numberstr = formatstr % number
        parts = numberstr.split( '.', 1 )
        if len( parts ) > 1:
            pre, post = parts
        else:
            pre = parts[0]
            post = ''
        if pre.startswith( '-' ):
            pre = pre[1:]
            minus = True
        final_number = []
        grouppos = 0
        for digit in reversed( pre ):
            if grouppos >= 3:
                final_number.insert( 0, ' ' )
                grouppos = 0
            final_number.insert( 0, digit )
            grouppos += 1
        if minus:
            final_number.insert( 0, '-' )
        final_str = ''.join( final_number )
        if post:
            final_str += ',' + post
        return final_str


    def init_collated_letters( self ):
        self.add_letter( u' ', 'symbol' )
        self.add_letter( u'!', 'symbol' )
        self.add_letter( u'"', 'symbol' )
        self.add_letter( u'#', 'symbol' )
        self.add_letter( u'$', 'symbol' )
        self.add_letter( u'.', 'symbol' )
        self.add_letter( u',', 'symbol' )
        self.add_letter( u'-', 'symbol' )
        self.add_letter( u'(', 'symbol' )
        self.add_letter( u')', 'symbol' )
        self.add_letter( u'0', 'number' )
        self.add_letter( u'1', 'number' )
        self.add_letter( u'2', 'number' )
        self.add_letter( u'3', 'number' )
        self.add_letter( u'4', 'number' )
        self.add_letter( u'5', 'number' )
        self.add_letter( u'6', 'number' )
        self.add_letter( u'7', 'number' )
        self.add_letter( u'8', 'number' )
        self.add_letter( u'9', 'number' )
        self.add_letter( u'A' )
        self.add_letter( u'Á' )
        self.add_letter( u'Â' )
        self.add_letter( u'Ä' )
        self.add_letter( u'Å' )
        self.add_letter( u'Æ' )
        self.add_letter( u'B' )
        self.add_letter( u'C' )
        self.add_letter( u'Ć' )
        self.add_letter( u'Č' )
        self.add_letter( u'Ç' )
        self.add_letter( u'D' )
        self.add_letter( u'Ď' )
        self.add_letter( u'DZ' )
        self.add_letter( u'DŽ' )
        self.add_letter( u'Ð' )
        self.add_letter( u'E' )
        self.add_letter( u'É' )
        self.add_letter( u'Ě' )
        self.add_letter( u'Ę' )
        self.add_letter( u'F' )
        self.add_letter( u'G' )
        self.add_letter( u'H' )
        self.add_letter( u'CH' )
        self.add_letter( u'I' )
        self.add_letter( u'Ì' )
        self.add_letter( u'Í' )
        self.add_letter( u'Î' )
        self.add_letter( u'Ï' )
        self.add_letter( u'Į' )
        self.add_letter( u'J' )
        self.add_letter( u'K' )
        self.add_letter( u'L' )
        self.add_letter( u'Ĺ' )
        self.add_letter( u'Ľ' )
        self.add_letter( u'Ł' )
        self.add_letter( u'M' )
        self.add_letter( u'N' )
        self.add_letter( u'Ń' )
        self.add_letter( u'Ň' )
        self.add_letter( u'Ñ' )
        self.add_letter( u'O' )
        self.add_letter( u'Ó' )
        self.add_letter( u'Ô' )
        self.add_letter( u'Ö' )
        self.add_letter( u'Ő' )
        self.add_letter( u'Œ' )
        self.add_letter( u'Ø' )
        self.add_letter( u'P' )
        self.add_letter( u'Q' )
        self.add_letter( u'R' )
        self.add_letter( u'Ŕ' )
        self.add_letter( u'Ř' )
        self.add_letter( u'S' )
        self.add_letter( u'Ś' )
        self.add_letter( u'Š' )
        self.add_letter( u'Ş' )
        self.add_letter( u'T' )
        self.add_letter( u'Ť' )
        self.add_letter( u'U' )
        self.add_letter( u'Ú' )
        self.add_letter( u'Ů' )
        self.add_letter( u'Ü' )
        self.add_letter( u'Ű' )
        self.add_letter( u'V' )
        self.add_letter( u'W' )
        self.add_letter( u'X' )
        self.add_letter( u'Y' )
        self.add_letter( u'Ý' )
        self.add_letter( u'Z' )
        self.add_letter( u'Ź' )
        self.add_letter( u'Ž' )




 

Static Wikipedia (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2006 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia February 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu