New Immissions/Updates:
boundless - educate - edutalab - empatico - es-ebooks - es16 - fr16 - fsfiles - hesperian - solidaria - wikipediaforschools
- wikipediaforschoolses - wikipediaforschoolsfr - wikipediaforschoolspt - worldmap -

See also: Liber Liber - Libro Parlato - Liber Musica  - Manuzio -  Liber Liber ISO Files - Alphabetical Order - Multivolume ZIP Complete Archive - PDF Files - OGG Music Files -

PROJECT GUTENBERG HTML: Volume I - Volume II - Volume III - Volume IV - Volume V - Volume VI - Volume VII - Volume VIII - Volume IX

Ascolta ""Volevo solo fare un audiolibro"" su Spreaker.
CLASSICISTRANIERI HOME PAGE - YOUTUBE CHANNEL
Privacy Policy Cookie Policy Terms and Conditions
Участник:Maksim-e/zzamen.py — Википедия

Участник:Maksim-e/zzamen.py

Материал из Википедии — свободной энциклопедии

# -*- coding: UTF-8 -*-

__version__='$Id:'

import wikipedia
import re, sys
import codecs
from zbib_tekst import *

def vivod(b):
    wikipedia.output(b)
    otch.write(b)
    otch.flush()

def sravn_opc(t,i,s,opc):
    ls=len(s)
    if i+ls>len(t):
        return 0
    k=0
    while k<ls:
        a=t[i+k]
        b=s[k]
        if (u'k' in opc) and ( (k>0 and (s[k-1] in u'[:')) or 
                               (k==0 and (u'l' in opc)) ):
            a=a.upper()
            b=b.upper()
        if a!=b:
            return 0
        k+=1
    return 1

def iskat_opc(t,i,s0,opc):

    ss=[s0]
    if (u's' in opc) and len(s0)>2 and s0[len(s0)-2:]==u']]':
        ss.append(s0[:len(s0)-2]+u'|')

    lt=len(t)
    while i<lt:
        if (u'l' in opc) and (t[i:i+2]==u'[[' or t[i:i+2]==u'{{'):
            pn=i+2
            (pk,ik)=iskats_mn(t,pn,[u'[[',u']]',u'{{',u'}}',u'|',u'#'])
            if pk==-1:
                pk=lt
            while pn<pk and (t[pn] in [u' ',u'\n',u'\t',u'\r']):
                pn+=1
            if pn<pk and t[pn]==u':' and s0[0]!=u':':
                pn+=1
            while pn<pk and (t[pk-1] in [u' ',u'\n',u'\t',u'\r']):
                pk-=1

            ssilk=t[pn:pk].replace(u'_',u' ').replace(u'  ',u' ')
#            ssilk=perv_upper(ubr_nk_prob(ssilk))
#            vivod(u'%d %d %d %d %s %s\n'%(pn,pk,len(ssilk),len(s0),ssilk,s0))
            if len(ssilk)==len(s0) and sravn_opc(ssilk,0,s0,opc):
                return (pn,pk)
            i=pk
        for j in range(len(ss)):
            s=ss[j]
            ls=len(s)
            ravn=sravn_opc(t,i,s,opc)
            if ravn:
                if j==1:
                    m=i+ls
                    m1=iskkonpodp(t,m,u'[[',u']]')
                    if m1!=-1:
                        return (i,m1+2)
                return (i,i+ls)
        i+=1
    return (-1,-1)


def zam(n,tzfl,tzst,tzno,kol,komm):


    pl = wikipedia.Page(mysite,wikipedia.UnicodeToAsciiHtml(n))
    wikipedia.output(pl.title())
#    pl._contents=u'qwe [[qwe]] [[qwer|qwe]] [[fqwe]] qwe :qwe {{wqwe}} [[qwe|qwer]] [qwe] sqwr dwer [[:qwe]] qwet dqwe gqwej [[enn:qwe]] [[qwey]]\n'
#    pl._contents+=u'Qwe [[Qwe]] [[Qwer|Qwe]] [[fQwe]] Qwe :Qwe {{wQwe}} [[Qwe|Qwer]] [Qwe] sQwr dwer [[:Qwe]] Qwet dQwe gQwej [[enn:Qwe]] [[Qwey]]'


    try:
        wikipedia.getall(mysite, [pl])
        nt = pl.get()
    except wikipedia.NoPage:
        b = u'- %s\n\n' % pl.title()
        vivod(b)
        return
    except wikipedia.IsRedirectPage:
        b = u'+ %s\n-> %s\n\n' % (pl.title(),pl.getRedirectTarget())
        vivod(b)
        return

    otch.write(u'+ %s\n' % pl.title())
    otch.flush()

    otch2.write(u'=========================\n%s\n-=======\n%s\n' % (pl.title(),nt))
    otch2.flush()

    sk=wikipedia.getCategoryLinks(nt,mysite)
    pechk=1

    j=0
    fliwi=1
    while j<kol:
        if (u'w' in tzfl[j]):
            fliwi=0
        j+=1

    obriwi=1
    katprefi=mysite.family.category_namespace(mysite.lang)
    if (u':' in pl.title()) and (not pl.title().startswith(katprefi+u':')):
        obriwi=0


    if obriwi and fliwi:
        oiwi = pl.interwiki()
        niwi = {}
        for pl2 in oiwi:
            if pl.site()!=pl2.site():
                niwi[pl2.site()] = pl2
        nt = wikipedia.replaceLanguageLinks(nt, {})

    nt = nt.replace(u'\r',u'')
#    while u' \n' in nt:
#        nt = nt.replace(u' \n',u'\n')

    fzam=0

    j=0
    while j<kol:
        zst=tzst[j]
        zst=zst[:len(zst)-1]
        zno=tzno[j]
        zno=zno[:len(zno)-1]
#        if mysite.lang==u'eo':
#            zst=zst.replace(u'Ĉ',u'Cx').replace(u'ĉ',u'cx')
#            zst=zst.replace(u'Ĝ',u'Gx').replace(u'ĝ',u'gx')
#            zst=zst.replace(u'Ĵ',u'Jx').replace(u'ĵ',u'jx')
#            zst=zst.replace(u'Ĥ',u'Hx').replace(u'ĥ',u'hx')
#            zst=zst.replace(u'Ŝ',u'Sx').replace(u'ŝ',u'sx')
#            zst=zst.replace(u'Ŭ',u'Ux').replace(u'ŭ',u'ux')
#            zno=zno.replace(u'Ĉ',u'Cx').replace(u'ĉ',u'cx')
#            zno=zno.replace(u'Ĝ',u'Gx').replace(u'ĝ',u'gx')
#            zno=zno.replace(u'Ĵ',u'Jx').replace(u'ĵ',u'jx')
#            zno=zno.replace(u'Ĥ',u'Hx').replace(u'ĥ',u'hx')
#            zno=zno.replace(u'Ŝ',u'Sx').replace(u'ŝ',u'sx')
#            zno=zno.replace(u'Ŭ',u'Ux').replace(u'ŭ',u'ux')
#        if (u'u' in tzfl[j]) and len(zno)>0 and (zno.lower() in nt.lower()):

        if (u'K' in tzfl[j]) and (len(sk) > 0):
            vivod(u'uzxe kat\n')
            if pechk:
                for kat in sk:
                    vivod(u'  %s\n'%kat.title())
                pechk=0
            vivod(u'\n')
            j+=1
            continue

        if (u'u' in tzfl[j]) and len(zno)>0 :
            (p,pk)=iskat_opc(nt,0,ubr_nk_prob(zno),tzfl[j])
            if p!=-1:
                b=u'uzxe\n'
                wikipedia.output(b)
                otch.write(b)
                otch.flush()
                j+=1
                continue

#        nt = nt.replace(zst,zno)
        nz=0
        p0=0
        while (len(zno)>0 or len(zst)>0) and p0<len(nt):
            if zst==u'':
                if (u'n' in tzfl[j]):
                    p=0
                else:
                    p=len(nt)
                pk=p
            else:
#                p=iskats(nt,p0,zst)
                (p,pk)=iskat_opc(nt,p0,zst,tzfl[j])
            if p==-1:
                break
            nt=nt[:p]+zno+nt[pk:]
            p0=p+len(zno)
            nz+=1
            fzam=1
            if zst==u'':
                break

        b=u'%d\n' % nz
        wikipedia.output(b)
        otch.write(b)
        otch.flush()
        j+=1

    if obriwi and (not fliwi):
        pl1=wikipedia.Page(pl.site(),pl.title())
        pl1._contents=nt
        oiwi = pl1.interwiki()
        niwi = {}
        for pl2 in oiwi:
            if pl.site()!=pl2.site():
                niwi[pl2.site()] = pl2
        nt = wikipedia.replaceLanguageLinks(nt, {})

    if fzam:
        while nt[len(nt)-1:]==u'\n':
            nt=nt[:len(nt)-1]
        if obriwi:
            nt+=wikipedia.replaceLanguageLinks(u'', niwi)

        otch2.write(u'+=======\n%s\n' % nt)
        otch2.flush()
        if flzap:
            pl.put(nt, comment = komm, minorEdit = False)
        else:
            vivod(u'!!! ne zapis\n')
    otch.write(u'\n')
    otch.flush()
    return


def main():
    zfl=0
    tzfl={}
    tzst={}
    tzno={}
    komm=u''
    n=[]
    f=codecs.open(filename,'rb',encoding='utf-8')

    i=0
    j=0
    for s in f.readlines():
#        wikipedia.output(u'%d\n' % ord(s[0]))
        if ord(s[0]) == 65279:
            s=s[1:]
#        wikipedia.output(u'%d %d  %s   %s\n' % (i,j,n,s))
        s=s.replace(u'\r',u'')
        if s[len(s)-1]!=u'\n':
            s=s+u'\n'

        if s[0:12] == u'============':
#            wikipedia.output(u'k%d %d  %s   %s\n' % (i,j,n,s))
            if i==100:
                b=u'!er %s\n' % n
                wikipedia.output(b)
                otch.write(b)
                otch.flush()
            if (i!=100) and (j>0):
#                wikipedia.output(u'kl%d %d  %s   %s\n' % (i,j,n,s))
                for n1 in n:
                    zam(n1,tzfl,tzst,tzno,j,komm)
            n=[]
            komm=u''
            i=0
            j=0
        elif i==0:
            if s[0:8]==u'?=======':
                i=2
            elif s[0]==u'|':
                komm=s[1:]
            elif s[0]!=u'#':
                n.append(s)
        elif i==2:
            zfl= (s[0]!=u'0')
            if zfl:
                tzfl[j]=s
                tzst[j]=u''
                tzno[j]=u''
                j+=1
            i=3
        elif i==3:
            if s[0:8]==u'-=======':
                i=4
            else:
                i=100
                wikipedia.output(u"er i==3\n")
        elif i==4:
            if s[0:8]==u'+=======':
                i=5
            else:
                if zfl:
                    tzst[j-1]+=s
        else: #if i==5:
            if s[0:8]==u'?=======':
                i=2
            else:
                if zfl:
                    tzno[j-1]+=s



#        wikipedia.output(u"No title found - skipping a page.")


#    text=''.join(text)

wikipedia.setAction('')

fi = sys.argv[1]
if fi.endswith(u'.txt'):
    fi=fi[:len(fi)-4]
filename = fi+'.txt'
fotch = fi+'_ot.txt'
fotch2 = fi+'_op.txt'

flzap=1
if len(sys.argv)>=3 and sys.argv[2]==u'n':
    flzap=0

otch = codecs.open(fotch, 'w', 'utf-8')
otch2 = codecs.open(fotch2, 'w', 'utf-8')
mysite = wikipedia.getSite()
try:
    main()
except:
    wikipedia.stopme()
    raise
else:
    wikipedia.stopme()



 

Static Wikipedia (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2006 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia February 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu