New Immissions/Updates:
boundless - educate - edutalab - empatico - es-ebooks - es16 - fr16 - fsfiles - hesperian - solidaria - wikipediaforschools
- wikipediaforschoolses - wikipediaforschoolsfr - wikipediaforschoolspt - worldmap -

See also: Liber Liber - Libro Parlato - Liber Musica  - Manuzio -  Liber Liber ISO Files - Alphabetical Order - Multivolume ZIP Complete Archive - PDF Files - OGG Music Files -

PROJECT GUTENBERG HTML: Volume I - Volume II - Volume III - Volume IV - Volume V - Volume VI - Volume VII - Volume VIII - Volume IX

Ascolta ""Volevo solo fare un audiolibro"" su Spreaker.
CLASSICISTRANIERI HOME PAGE - YOUTUBE CHANNEL
Privacy Policy Cookie Policy Terms and Conditions
Участник:Maksim-e/zperevod.py — Википедия

Участник:Maksim-e/zperevod.py

Материал из Википедии — свободной энциклопедии

# -*- coding: UTF-8 -*-

__version__='$Id:'

import wikipedia
import re, sys
import codecs
from zbib_tekst import *

def vivod(b):
#    wikipedia.output(b)
#    otch.write(b)
#    otch.flush()
    pass

class Sl_sta:
    def __init__(self,vihs):
        self.iwi = []
        self.tp = [vihs]
        self.toss = []
        self.riwi=0

def dobavslov2(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk,oss):
# Standardized : Normigita, Normigita
#    if vhs.startswith(u'Standardiz'):
    if vhs.startswith(u'Provinci'):
        a=-1
        b=-1
        if slov.has_key((vhjaz,vhs)):
            a=len(slov[(vhjaz,vhs)].tp)
            b=len(slov[(vhjaz,vhs)].iwi)
        wikipedia.output(u'2 %s %s  %s  %d %d  %d %d'%(vhs,vihs,oss,fiwi,fltolk,a,b))
    if slov.has_key((vhjaz,vhs)):
        if fltolk==2:
            return
        if fltolk==1:
            slov[(vhjaz,vhs)].tp=[vihs]
        elif not vihs in slov[(vhjaz,vhs)].tp:
            slov[(vhjaz,vhs)].tp.append(vihs)
        if fiwi:
            if not vihs in slov[(vhjaz,vhs)].iwi:
                slov[(vhjaz,vhs)].iwi.append(vihs)
    else:
        slov[(vhjaz,vhs)]=Sl_sta(vihs)
        if fiwi:
            slov[(vhjaz,vhs)].iwi.append(vihs)

    if oss:
        if not (oss,vihs) in slov[(vhjaz,vhs)].toss:
            slov[(vhjaz,vhs)].toss.append((oss,vihs))


    if fiwi==2:
        slov[(vhjaz,vhs)].riwi=1

#    i=len(slov[(vhjaz,vhs)].tp)-1
#    otch.write(u'%d  %s:%s:%s\n'%(i,vhjaz,vhs,vihs))
#    otch.flush()

def eo_pluralo(t):
    (p,n)=iskats_mn(t,0,[u' de ',u' en ',u' el ',u' al ',u' je ',u' por '])
    if p==-1:
        p=len(t)
    if p<=0 or (not t[p-1] in [u'a',u'o']):
        return u''
    p=t[:p].replace(u'a ',u'aj ')+'j'+t[p:]
#    wikipedia.output(p)
    return p

en_soglasn=[u'b',u'c',u'd',u'f',u'g',u'h',u'j',u'k',u'l',u'm',u'n',u'p',u'q',u'r',u's',u't',u'v',u'w',u'x',u'z']

def fleksii_en_eo_glag(slov,vhs,osneo,fltolk,oss):
    vhok=vhs[len(vhs)-1]

    if vhok==u'e':
        vhs1=vhs[:len(vhs)-1]+u'es'
        vihs1=osneo+u'as'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs[:len(vhs)-1]+u'ed'
        vihs1=osneo+u'is'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs[:len(vhs)-1]+u'ing'
        vihs1=osneo+u'anta'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
    elif vhok==u'y':
        vhs1=vhs[:len(vhs)-1]+u'ies'
        vihs1=osneo+u'as'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u'es'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u's'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs[:len(vhs)-1]+u'ied'
        vihs1=osneo+u'is'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u'ed'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs[:len(vhs)-1]+u'ing'
        vihs1=osneo+u'anta'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u'ing'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
    else: 
        vhs1=vhs+u's'
        vihs1=osneo+u'as'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs+u'ed'
        vihs1=osneo+u'is'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        if vhok in en_soglasn:
            vhs1=vhs+vhok+u'ed'
            dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs+u'ing'
        vihs1=osneo+u'anta'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        if vhok in en_soglasn:
            vhs1=vhs+vhok+u'ing'
            dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

def fleksii_en_eo(slov,vhs,vihs,fltolk,oss):
#    if (u' ' in vhs) or (u' ' in vihs):
#        return
    if len(vhs)<3 or len(vihs)<3:
        return
    tipeo=vihs[len(vihs)-1]
    osneo=vihs[:len(vihs)-1]
    vhok=vhs[len(vhs)-1]

    vihs1=eo_pluralo(vihs)
    if vihs1!=u'':
        if vhok==u'y':
            vhs1=vhs[:len(vhs)-1]+u'ies'
            dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u's'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)
        vhs1=vhs+u'es'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

    if tipeo==u'o':
        vhs1=vhs+u'\'s'
#        vihs1=u'De '+vihs
        vihs1=osneo+u'a'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        osneo1=osneo
        if osneo1[len(osneo1)-2:]==u'il':
            osneo1=osneo1[:len(osneo1)-2]
        if (   (not u' ' in vhs) and (not u' ' in vihs) and 
               (not vhs.endswith(u'tion')) and 
               (not vhs.endswith(u'ment'))   ):
            fleksii_en_eo_glag(slov,vhs,osneo1,fltolk,oss)

    elif tipeo==u'i':
        fleksii_en_eo_glag(slov,vhs,osneo,fltolk,oss)

    elif tipeo==u'a':
        vhs1=vhs+u'ly'
        vihs1=osneo+u'e'
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs+u'er'
        vihs1=u'Pli '+vihs.lower()
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)

        vhs1=vhs+u'est'
        vihs1=u'Plej '+vihs.lower()
        dobavslov2(slov,u'en',u'eo',vhs1,vihs1,0,fltolk,oss)



def dobavslov(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk):
    vhs=ubr_nk_prob(vhs)
    if vhs==u'':
        return
#    if vhs==u'Circumscribed' or vhs==u'circumscribed':
#        wikipedia.output(u'%s %s %d %d'%(vhs,vihs,fiwi,fltolk))
    vihs=ubr_nk_prob(vihs)
    if vihs==u'':
        return
    vhs=vhs[0].upper()+vhs[1:]
    vihs=vihs[0].upper()+vihs[1:]

    oss=u''
    if fiwi:
        oss=vhs

    dobavslov2(slov,vhjaz,vihjaz,vhs,vihs,fiwi,fltolk,oss)
    if vhjaz==u'en' and vihjaz==u'eo':
        fltolk1=fltolk
        if fltolk1==1:
            fltolk1=0 
        fleksii_en_eo(slov,vhs,vihs,fltolk1,oss)

def zagruzslov_revo(slov,vhjaz,vihjaz,fslov,ftdob,fkujo):
    f=codecs.open(fslov,'rb',encoding='utf-8')
    for s in f.readlines():
#        wikipedia.output(u'%d\n' % ord(s[0]))
        if ord(s[0]) == 65279:
            s=s[1:]
#        wikipedia.output(u'%d %d  %s   %s\n' % (i,j,n,s))
        s=s.replace(u'\r',u'')
        if s[len(s)-1]==u'\n':
            s=s[:len(s)-1]

        fltolk=0
        if ftdob:
            fltolk=2
        elif len(s)>=1 and s[0]==u'!':
            fltolk=1
            s=s[1:]
        
        i=iskat (s,u':')
        if i!=-1:
            vhs=s[:i]
            vihst=s[i+1:].replace(u';',u',').split(u',')
            if (u',' in vhs) or (u';' in vhs):
                j=0
                sk=0
                vhst=[]
                while j<len(vhs):
                    if vhs[j]==u'(' or vhs[j]==u'[':
                        sk+=1
                    elif vhs[j]==u')' or vhs[j]==u']':
                        sk-=1
                    if sk==0 and (vhs[j]==u',' or vhs[j]==u';'):
                        vhst.append(vhs[:j])
                        vhs=vhs[j+1:]
                        j=0
                    j+=1
                vhst.append(vhs)
            else:
                vhst=[vhs]
            for vhs in vhst:
                p=iskat (vhs,u'(')
                zakrsk=u')'
                if p==-1:
                    p=iskat (vhs,u'[')
                    zakrsk=u']'
                if p!=-1:
                    p1=iskat (vhs,zakrsk)
                    if p1!=-1:
                        if p==0 or vhs[p-1]==u' ':
                            vhs=vhs[:p]+vhs[p1+1:]
                        else:
                            vhs1=vhs[:p]+vhs[p1+1:]
                            fltolk1=fltolk
                            for vihs in vihst:
                                dobavslov(slov,vhjaz,vihjaz,vhs1,vihs,0,fltolk1)
                                if fltolk1==1:
                                    fltolk1=0
                fltolk1=fltolk
                for vihs in vihst:
                    if (  fkujo and vihjaz==u'eo' and vihs.endswith(u'ujo')  ):
                        if vhs[0].isupper:
                            vihs=vihs[:len(vihs)-3]+u'io'
                        elif vhjaz==u'en' and vhs.endswith(u'tree'):
                            vihs=vihs[:len(vihs)-3]+u'arbo'

                    dobavslov(slov,vhjaz,vihjaz,vhs,vihs,0,fltolk1)
                    if fltolk1==1:
                        fltolk1=0


    return



def zagruzslov_viki(slov,vhjaz,vihjaz,fslov,friwi):
    f=codecs.open(fslov,'rb',encoding='utf-8')
    j=0
    jt0={}
    st0={}
    fiwi=1
    if friwi:
        fiwi=2
    vihs=u''
    for s in f.readlines()+[u'\n']:
#        wikipedia.output(u'%d\n' % ord(s[0]))
        if ord(s[0]) == 65279:
            s=s[1:]
#        wikipedia.output(u'%d %d  %s   %s\n' % (i,j,n,s))
        s=ubr_nk_prob(s)

        if s == u'':
            if j>=1 and vihs!=u'':
                k=0
                while k<j:
                    dobavslov(slov,jt0[k],vihjaz,st0[k],vihs,fiwi,0)
                    k+=1
            j=0
            jt0={}
            st0={}
            vihs=u''

        else:
            i=iskat (s,u':')
            if i!=-1:
                if s[:i]==vihjaz:
                    vihs=s[i+1:]
                else:
                    if vhjaz==u'' or vhjaz==s[:i]:
                        jt0[j]=s[:i]
                        st0[j]=s[i+1:]
                        j+=1
    return

def zagruz_ns(ns,fi):
    f=codecs.open(fi,'rb',encoding='utf-8')
    for s in f.readlines():
        if ord(s[0]) == 65279:
            s=s[1:]
        s=ubr_nk_prob(s.replace(u'\r',u''))
        if s != u'':
            ns[s]=1
    return


def zagruzslov(slov,slovdop,nssvoj,vhjaz,vihjaz,fslov):
    f=codecs.open(fslov,'rb',encoding='utf-8')
    for s in f.readlines():
        if ord(s[0]) == 65279:
            s=s[1:]
        s=s.replace(u'\r',u'')
        if s[len(s)-1]==u'\n':
            s=s[:len(s)-1]
        p=iskat(s,u'#')
        if p!=-1:
            s=s[:p]
        if len(s)<1:
            continue
        st0=s.split(u' ',1)
        tip=st0[0]
        s=st0[1]
        if tip==u'w':
            fi=ubr_nk_prob(s)
            zagruzslov_viki(slov,vhjaz,vihjaz,fi,0)
        elif tip==u'wr':
            fi=ubr_nk_prob(s)
            zagruzslov_viki(slov,vhjaz,vihjaz,fi,1)
        elif tip[0]==u'r':
            st=s.split(u' ',2)
            ftdob=0
            fkujo=0
            if u'd' in tip[1:]:
                ftdob=1
            if u'u' in tip[1:]:
                fkujo=1
            if (vhjaz==u'' or st[0]==vhjaz) and st[1]==vihjaz:
                zagruzslov_revo(slov,st[0],vihjaz,ubr_nk_prob(st[2]),
                                      ftdob,fkujo)
        elif tip==u's':
            st=s.split(u' ',1)
            if st[0]==vihjaz:
                zagruz_ns(nssvoj,ubr_nk_prob(st[1]))
#        elif tip==u'c':
#            fi=ubr_nk_prob(s)
#            zagruz_ns(nscom,fi)
#        elif tip==u'z':
#            fi=ubr_nk_prob(s)
#            zagruz_ns(zapriz,fi)
        else:
            if not slovdop.has_key(tip):
                slovdop[tip]={}
            fi=ubr_nk_prob(s)
            zagruz_ns(slovdop[tip],fi)

class Perev_stat:
    def __init__(self):
        self.flp = 0
        self.nup = 0
        self.nnp = 0
        self.nvap = 0
        self.snp = []
        self.svap = []

def korr_uktbl(uktbl,us,un,i):
    if uktbl==None:
        return i
    while i<len(uktbl) and uktbl[i]<=us:
        uktbl[i]+=un-us
        if i>0 and uktbl[i]<uktbl[i-1]:
            uktbl[i]=uktbl[i-1]
        i+=1
    return i

class Dslov:
    def __init__(self,ti,na,ok,ok1,ok2):
        self.ti = ti
        self.na = na
        self.ok = ok
        self.ok1 = ok1
        self.ok2 = ok2

def perevod_korr_eo(s,uktbl):
    prom=[]
    slov=[]
    spslov=[]
    p=0
    while 1:
        p0=p
        while p<len(s) and not s[p].isalpha():
            p+=1
        prom.append(s[p0:p])
        if p>=len(s):
            break
        p1=p
        while p<len(s) and (s[p].isalpha() or 
                             (p<len(s)-1 and s[p]==u'-' and s[p+1].isalpha())):
            p+=1
        slov.append(s[p1:p])
        spslov.append(p1)

    dslov=[]
    for sl in slov:
#        sl=sl.lower()
        ti=0
        ok2=u''
        ok1=u'' 
        if RBRm(sl,[u'la',u'kaj',u'tra',u'pro',u'do',u'na']):
            na=sl
            ok=u''
        else:
            na=sl[:len(sl)-1]
            ok=sl[len(sl)-1]
            if len(na)>=2 and RBR(ok,u'n'):
                ok2=ok
                ok=u''
                sl=na

            if RBRm(sl,[u'si',u'li',u'ĝi',u'ŝi']):
                ti=110
                na=sl
                ok=u''
            elif RBRm(sl,[u'ili',u'oni']):
                ti=130
                na=sl
                ok=u''
            else:
                na=sl[:len(sl)-1]
                ok=sl[len(sl)-1]
                if len(na)>=2 and RBR(ok,u'j'):
                    ok1=ok
                    ok=u''
                    sl=na

                na=sl[:len(sl)-1]
                ok=sl[len(sl)-1]
                if RBRm(sl,[u'iu',u'kiu',u'tiu',u'ĉiu',u'neniu']):
                    ti=210
                else:
                    if RBR(ok,u'o'):
                        ti=100
                    elif RBR(ok,u'a'):
                        ti=200
                    elif ok1==u'' and RBR(ok,u'e'):
                        ti=300
                    elif ok1==u'' and ok2==u'' and RBRm(ok,[u'i',u'u']):
                        ti=400
                    elif (ok1==u'' and ok2==u'' and RBR(ok,u's') and
                                   len(sl)>=4 and 
                                   RBRm(sl[len(sl)-2],[u'i',u'a',u'o',u'u'])):
                        na=sl[:len(sl)-2]
                        ok=sl[len(sl)-2:]
                        ti=400
                    else:
                        ti=0
            if RBR(ok1,u'j'):
                ti+=20
        dslov.append(Dslov(ti,na,ok,ok1,ok2))
        vivod(u'  %s  ti=%d\n'%(na+ok+ok1+ok2,ti))

    i=len(dslov)-1
    while i>=0:
        if dslov[i].ti==400 and dslov[i].ok==u'is' and (i<1 or
                dslov[i-1].ti==400 or 
                (dslov[i-1].ti==300 and (i<2 or dslov[i-2].ti==400)) ):
            dslov[i].ti=200
            dslov[i].na+=u'it'
            dslov[i].ok=u'a'
        i-=1

    i=len(dslov)-1
    while i>=0:
        ti0=dslov[i].ti
#        vivod(u'  i=%d  ti0=%d\n'%(i,ti0))
        j=i-1
        if ti0>=100 and ti0<200:
            while j>=0:
                if not (dslov[j].ti>=100 and dslov[j].ti<300):
                    break
                if dslov[j].ok2!=u'':
                    break
                if prom[j+1]!=u' ' and prom[j+1]!=u'\n':
                    break
                vivod(u'   j=%d  ti=%d\n'%(j,dslov[j].ti))
                if dslov[j].ti>=100 and dslov[j].ti<200:
                    dslov[j].ti+=100
                    dslov[j].ok=u'a'
                    if dslov[j].ti==210 or dslov[j].ti==230:
                        dslov[j].ti==200
                if ti0>=120 and dslov[j].ti>=200 and dslov[j].ti<220:
                    dslov[j].ti+=20
                    dslov[j].ok1=u'j'
                elif ti0<120 and dslov[j].ti>=220 and dslov[j].ti<240:
                    dslov[j].ti-=20
                    dslov[j].ok1=u''
                j-=1
        i=j

    rez=u''
    iuktbl=0
    i=0
    while i<len(dslov):
        rez+=prom[i]
        iuktbl=korr_uktbl(uktbl,spslov[i],len(rez),iuktbl)
        ds=dslov[i] 
        rez+=ds.na+ds.ok+ds.ok1+ds.ok2
        i+=1

    rez+=prom[i]
    korr_uktbl(uktbl,len(s),len(rez),iuktbl)

    return rez

neperevt=[u'<br',u'<tr>',u'</tr>',u'<td>',u'</td>',u'<font',u'</font>',
        u'<span',u'</span>',u'<b>',u'</b>',u'<i>',u'</i>',u'<s>',u'</s>',
        u'<sub',u'</sub>',u'<sup',u'</sup>',u'<gallery>',u'</gallery>',
        u'<center>',u'</center>',u'<blockquote>',u'</blockquote>',
        u'<div',u'</div>',u'<small>',u'</small>',u'<tt>',u'</tt>',
        u'<ref>',u'</ref>',u'<references',u'</refernces>',]

neperevat=[u'width= ',u'colspan= ',u'border= ',u'style= ',u'align= ',u'id= ',
        u'bgcolor= ',u'color= ',u'clear= ',u'valign= ',u'cellpadding= ',
        u'cellspacing= ',u'class= ',u'<ref name= ',
        u'http://',u'https://',
        u'width=',u'colspan=',u'border=',u'style=',u'align=',u'id=',
        u'bgcolor=',u'color=',u'clear=',u'valign=',u'cellpadding=',
        u'cellspacing=',u'class=',u'<ref name=',]

def perevod_stat(slov,vhjaz,vihjaz,s,uktbl=None,sstbl=None):
    pes=Perev_stat()
    maxdl=48
    if not uktbl:
        while u'  ' in s:
            s = s.replace(u'  ',u' ')
    
    if len(s)<1:
        return (u'',pes)
    rez=u''
    iuktbl=0
    i=0
    while i<len(s):
        while i<len(s) and s[i]==u' ':
            rez+=s[i]
            i+=1
        if i>=len(s):
            break
        fl=1
        while fl:
            fl=0
            if i+3<len(s) and s[i]==u'&':
                j=iskat(s[i:i+10],u';')
                if j!=-1 and ( (s[i+1]==u'#' and s[i+2:i+j].isdigit()) or 
                                                 s[i+1:i+j].isalpha() ):
                    np=s[i:i+j+1]
                    rez+=np
                    i+=len(np)
                    fl=1
                    break
            for np in neperevt:
                if s[i:i+len(np)].lower()==np:
                    rez+=np
                    i+=len(np)
                    fl=1
                    break
            for np in neperevat:
                if s[i:i+len(np)].lower()==np:
                    dl=len(np)
                    if i+dl<len(s) and s[i+dl]==u'"':
                        j=iskat(s[i+len(np)+1:],u'"')
                        if j!=-1:
                            dl+=2+j
                    else:
#                        j=iskat(s[i+dl:],u' ')
#                        if j==-1:
#                            j=iskat(s[i+dl:],u'\n')
                        (j,jj)=iskats_mn(s[i+dl:],0,[u' ',u'\n',u'|'])
                        if j!=-1:
                            dl+=j
                    iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
                    rez+=s[i:i+dl]
                    i+=dl
                    fl=1
                    break
        if i>=len(s):
            break
        dl=maxdl
        if i+dl>len(s):
            dl=len(s)-i
        while dl>0:
            if dl>0 and i+dl<len(s) and s[i+dl].isalnum():
                dl-=1
                continue
            tu=s[i:i+dl]
            tu0=tu[0].upper()
            flmb=(tu0!=tu[0])
            tu=tu0+tu[1:]
            if slov.has_key((vhjaz,tu)):
                pes.flp=1
                tp=slov[(vhjaz,tu)].tp
                npe=len(tp)
                if npe==1:
                    pe=tp[0]
                    if pe==u'_':
                        if i+dl<len(s) and s[i+dl]==u' ' and (s[i].islower() or
                                   i<2 or (s[i-1] in [u'.',u'\n']) or
                                          (s[i-2] in [u'.',u'\n'])):
                            pe=u'' 
                            if (i+dl+1<len(s) and s[i].isupper() and 
                                                  s[i+dl+1].islower()):
                                s=s[:i+dl+1]+s[i+dl+1].upper()+s[i+dl+2:]
                            dl+=1
                        else:
                            pe=s[i:i+dl]
                    elif flmb:
                        pe=pe[0].lower()+pe[1:]
                    pes.nup+=1
                else:
                    if sstbl and uktbl:
                        for (oss,iuk) in sstbl:
                            if i==uktbl[iuk] and i+dl==uktbl[iuk+1]:
                                oss=perv_upper(oss)
                                toss=slov[(vhjaz,tu)].toss
                                tpw=[]
                                for (ossw,pew) in toss:
                                    if (ossw==oss) and (pew in tp):
                                        tpw.append(pew)
                                print unicode.encode (u'oss="%s" tp="%s" tpw="%s"'%(oss,spisvstr(tp,u','),spisvstr(tpw,u',')) , 'utf-8')
                                if ( len(tpw) >= 1 ):
                                    tp=tpw
                                break

                    npe=len(tp)
                    if npe==1:
                        pe=tp[0]
                        if flmb:
                            pe=pe[0].lower()+pe[1:]
                        pes.nup+=1
                    else:
                        pe=u'('
                        k=0
                        while k<npe:
                            pe1=tp[k]
                            if flmb:
                                pe1=pe1[0].lower()+pe1[1:]
                            pe+=pe1
                            k+=1
                            if k<npe:
                                pe+=u', '
                        pe+=u')'
                        pes.nvap+=1
                        pes.svap.append(tu)

                iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
                rez+=pe
                break
            dl-=1
        if dl>0:
            i+=dl
        else:
            j=1
            while i+j<len(s) and s[i+j-1].isalnum() and s[i+j].isalnum():
                j+=1
            iuktbl=korr_uktbl(uktbl,i,len(rez),iuktbl)
            if s[i].isalnum() and not s[i:i+j].isdigit():
                j1=0
                nbkv=0
                while j1<j:
                    if s[i+j1].isalpha():
                        nbkv+=1
                    j1+=1
                if nbkv>=3:
                    rez+=u'_'+s[i:i+j]+u'_'
                    pes.nnp+=1
                    pes.snp.append(perv_upper(s[i:i+j]))
                else:
                    rez+=s[i:i+j]
            else:
                rez+=s[i:i+j]
            i+=j
    korr_uktbl(uktbl,len(s),len(rez),iuktbl)

    if vihjaz==u'eo':
        rez=perevod_korr_eo(rez,uktbl)

    return (rez,pes)

def perevod(slov,vhjaz,vihjaz,s,uktbl=None):
    (t,stat)=perevod_stat(slov,vhjaz,vihjaz,s,uktbl)
    return (t,stat.flp)

def perevod_iwi_spis(slov,vhjaz,vihjaz,s):
    s = s.replace(u'_',u' ')
    while u'  ' in s:
        s = s.replace(u'  ',u' ')
    s=ubr_nk_prob(s)

    if len(s)<1:
        return []

    tu=s
    tu0=tu[0].upper()
    flmb=(tu0!=tu[0])
    tu=tu0+tu[1:]

    if not slov.has_key((vhjaz,tu)):
        return []
    return slov[(vhjaz,tu)].iwi

def perevod_iwi(slov,vhjaz,vihjaz,s):
    iwi=perevod_iwi_spis(slov,vhjaz,vihjaz,s)
    npe=len(iwi)
    if npe<1:
        return u''
    if npe==1:
        pe=iwi[0]
#        if flmb:
#            pe=pe[0].lower()+pe[1:]
        pe=u'[['+pe+u']]'
    else:
        pe=u''
        k=0
        while k<npe:
            pe1=iwi[k]
#            if flmb:
#                pe1=pe1[0].lower()+pe1[1:]
            pe+=u'[['+pe1+u']]'
            k+=1
            if k<npe:
                pe+=u', '
    return pe

def perev_uch_nezsl(osnp,osvap,stat):

    for t in stat.snp:
        if osnp.has_key(t):
            osnp[t]+=1
        else:
            osnp[t]=1

    for t in stat.svap:
        if osvap.has_key(t):
            osvap[t]+=1
        else:
            osvap[t]=1

def perev_uch_nezsl_rt(osnp,osvap,stat):

    for t in stat.snp:
        osnp[t]=1

    for t in stat.svap:
        osvap[t]=1

def perev_pech_nezsl(slov,vhjaz,vihjaz,osnp,osvap,fnesl,kolotch):

    ssnp=[]
    for t, n in osnp.iteritems():
        ssnp.append((n,t))

    ssnp.sort(reverse=True)
    i=0
    for n, t in ssnp:
        fnesl.write(u'%d  %s\n'%(n,t))
        fnesl.flush()
        i+=1
        if i>=kolotch:
            break

    ssvap=[]
    for t, n in osvap.iteritems():
        ssvap.append((n,t))

    fnesl.write(u'\n')
    fnesl.flush()

    ssvap.sort(reverse=True)
    i=0
    for n, t in ssvap:
        fnesl.write(u'%d  %s\n'%(n,t))
        fnesl.flush()
        i+=1
        if i>=kolotch:
            break

    fnesl.write(u'\n')
    fnesl.flush()

    i=0
    for n, t in ssnp:
        fnesl.write(u'%s\n'%t)
        fnesl.flush()
        i+=1
        if i>=kolotch:
            break

    fnesl.write(u'\n')
    fnesl.flush()

    i=0
    for n, t in ssvap:
#        (t2,stat2)=perevod_stat(slov,vhjaz,vihjaz,t)

        if slov.has_key((vhjaz,t)):
            tp=slov[(vhjaz,t)].tp
            if len(tp)>1:

                t2=spisvstr(tp,u' , ')

                fnesl.write(u'! %s : %s\n'%(t,t2))
                fnesl.flush()
        i+=1
        if i>=kolotch:
            break


def main(slov,vhjaz,vihjaz,fvh,fvih):
    kolotch=500
    f0=codecs.open(fvh,'rb',encoding='utf-8')
    f1=codecs.open(fvih, 'w', 'utf-8')
    osnp={}
    osvap={}
    for s in f0.readlines():
#        wikipedia.output(u'%d\n' % ord(s[0]))
        if ord(s[0]) == 65279:
            s=s[1:]
#        wikipedia.output(u'%d %d  %s   %s\n' % (i,j,n,s))
        s=s.replace(u'\r',u'').replace(u'_',u' ')
#        if s[len(s)-1]!=u'\n':
#            s=s+u'\n'

#        (rez,fl_perev)=perevod(slov,vhjaz,vihjaz,s)
        (rez,stat)=perevod_stat(slov,vhjaz,vihjaz,s)

        f1.write(rez)
        f1.flush()

        perev_uch_nezsl(osnp,osvap,stat)

    perev_pech_nezsl(slov,vhjaz,vihjaz,osnp,osvap,otch,kolotch)


if __name__ == "__main__":
    try:
        vhjaz = sys.argv[1]
        vihjaz = sys.argv[2]
        fslov = sys.argv[3]
        fvh = sys.argv[4]
        fvih = sys.argv[5]
        fotch = sys.argv[6]

        otch = codecs.open(fotch, 'w', 'utf-8')
        #otch2 = codecs.open(fotch2, 'w', 'utf-8')
        slov={}
        nssvoj={}
        slovdop={}
        zagruzslov(slov,slovdop,nssvoj,vhjaz,vihjaz,fslov)
        main(slov,vhjaz,vihjaz,fvh,fvih)
    finally:
        wikipedia.stopme()


 

Static Wikipedia (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2006 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia February 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu