New Immissions/Updates:
boundless - educate - edutalab - empatico - es-ebooks - es16 - fr16 - fsfiles - hesperian - solidaria - wikipediaforschools
- wikipediaforschoolses - wikipediaforschoolsfr - wikipediaforschoolspt - worldmap -

See also: Liber Liber - Libro Parlato - Liber Musica  - Manuzio -  Liber Liber ISO Files - Alphabetical Order - Multivolume ZIP Complete Archive - PDF Files - OGG Music Files -

PROJECT GUTENBERG HTML: Volume I - Volume II - Volume III - Volume IV - Volume V - Volume VI - Volume VII - Volume VIII - Volume IX

Ascolta ""Volevo solo fare un audiolibro"" su Spreaker.
CLASSICISTRANIERI HOME PAGE - YOUTUBE CHANNEL
Privacy Policy Cookie Policy Terms and Conditions
Участник:Maksim-e/zdob apa ali.py — Википедия

Участник:Maksim-e/zdob apa ali.py

Материал из Википедии — свободной энциклопедии

# -*- coding: UTF-8 -*-

__version__='$Id:'

import wikipedia, pagegenerators
import re, sys, os
import codecs
from zbib_tekst import *
from zget_buf import *

skbtekst=u' (apartigilo)'

def vivod(b):
    wikipedia.output(b)
    otch.write(b)
    otch.flush()

def ubrkoment (text):
    # Ignore tekst within nowiki tags and HTML comments
    nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
    match = nowikiOrHtmlCommentR.search(text)
    while match:
        text = text[:match.start()] + text[match.end():]    
        match = nowikiOrHtmlCommentR.search(text)
    text=text.replace(u'-->',u'-- >')
    return text

def prov_v_koment (text,pm):
    # Ignore tekst within nowiki tags and HTML comments
    nowikiOrHtmlCommentR = re.compile(r'.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
    for match in nowikiOrHtmlCommentR.finditer(text):
        for p in pm:
            if p>=match.start() and p<match.end():
                return 1
    return 0

def prov_nal_sslk(ot,s):
    st=[
        u'[[%s]]'%perv_upper(s),
        u'[[%s]]'%perv_lower(s),
        u'[[%s|'%perv_upper(s),
        u'[[%s|'%perv_lower(s),
       ]
    (i,i1)=iskats_mn (ot,0,st)
    if i!=-1:
        return 1
    return 0


class Statis:
    def __init__(self):
        self.nsozdali=0
        self.nizm=0
        self.nneizm=0
        self.nizmali=0
        self.nneizmali=0

        self.nstrskb=0

        self.vhsta=0
        self.vhapa=0
        self.vhpust=0
        self.vhnet=0
        self.vhali=0

    def vivod(self):
        vivod((u'statis:  '+
               u'nsozdali=%d  '+
               u'nizm=%d  '+
               u'nneizm=%d  '+
               u'nizmali=%d  '+
               u'nneizmali=%d  '+

               u'nstrskb=%d  '+

               u'vhsta=%d  '+
               u'vhapa=%d  '+
               u'vhpust=%d  '+
               u'vhnet=%d  '+
               u'vhali=%d  ')%
       (self.nsozdali,
        self.nizm,
        self.nneizm,
        self.nizmali,
        self.nneizmali,

        self.nstrskb,

        self.vhsta,
        self.vhapa,
        self.vhpust,
        self.vhnet,
        self.vhali))


def zapis_dob(se,tt,f1):
    tt=perv_upper(tt)
    q=[u'',u'',u'',u'',u'',u'']
    q[0]=se
    q[1]=se
    q[2]=tt
    q[3]=(u"*'''[[%s]]'''")%tt
    q[4]=tt
    q[5]=u'l'
    qt=spisvstr(q,u'@')+u'\n'
    f1.write(qt)
    f1.flush()

def rabota(tbl_pl,pl,kod,kim,statis):
    vivod(u'%s  %s\n'%(pl.title(),kim))
    si0=pl.site()
    shapa=u'{{Apartigilo}}'

    fnov=0
    fzam=0
    sali=u''

    try:
        pl=get_tbl(tbl_pl,pl)
        ot = pl.get()
        vivod(u'+ %s\n' % pl.title())
        otnk=ubrkoment(ot)

        if len(ot)<=2:
            ot = u''
            vivod(u' <- pust\n')
            statis.vhpust+=1
            fnov=1

        elif (shapa in otnk) or (shapa.lower() in otnk):
            vivod(u' <- apa\n')
            otchna.write(u'* [[%s]]\n'% pl.title())
            otchna.flush()
            statis.vhapa+=1
            return

        else:
            vivod(u' <- ne apa\n')
            statis.vhsta+=1

    except wikipedia.NoPage:
        vivod(u'- %s\n' % pl.title())
        ot = u''
        statis.vhnet+=1
        fnov=1

    except wikipedia.IsRedirectPage:
        sali=pl.getRedirectTarget()
        vivod(u'+ %s\n-> %s\n' % (pl.title(),sali))
        ot = u''
        statis.vhali+=1
#?????????????? return?
#        return

    if sali!=u'':
        if perv_upper(sali)==perv_upper(kim):
            statis.nneizmali+=1
            return
        else:
            zapis_dob(kod,sali,otchda)
            if not fizmali:
                return

    if ot==u'':
        if kim==pl.title()+skbtekst:
            vivod(u' <- ???\n')
            otchna.write(u'*? [[%s]]\n'% pl.title())
            otchna.flush()
            statis.nstrskb+=1
            return

        ot=u'#REDIRECT [[%s]]'%kim
        if sali!=u'':
            statis.nizmali+=1
            vivod(u'.izm ali -> %s\n'%kim)
        else:
            statis.nsozdali+=1
            vivod(u'.sozd ali -> %s\n'%kim)
        fzam=1
    else:
        zapis_dob(kod,pl.title(),otchda)

        if prov_nal_sslk(otnk,kim):
            statis.nneizm+=1
            vivod(u'.neizm uzxe -> %s\n'%kim)
        else:
            t=(u"''Pri la aliaj signifoj de %s rigardu en [[%s]].''\n----\n"%
                                   (kod,kim))
            statis.nizm+=1
            ot=t+ot
            vivod(u'.izm -> %s\n'%kim)
            fzam=1


    if fzam:

        otch2.write(u'%s\n+=======\n%s\n============\n' % (pl.title(),ot))
        otch2.flush()

        komm=u''
        try:
            if fzapis:
                pl.put(ot, comment = komm, minorEdit = False)
            pass
        except wikipedia.EditConflict:
            vivod('!!!!EditConflict!!!!\n')
        except wikipedia.LockedPage:
            vivod('!!!!LockedPage!!!!\n')

    statis.vivod()
    return

def zapis_fimen(fimen,nomerf,t):
    fimen.write(u'%d %s\n' % (nomerf,t))
    fimen.flush()

def provstop(fstop):
    return os.access(fstop,os.F_OK)

def ivsenmain():

    fstop='dob_apa_ali_stop.txt'
    try:
        os.unlink(fstop)
    except:
        pass

    if provstop(fstop):
        wikipedia.output(u'stop0!\n')
        return

    n=u''
    skon=u''
    nomerf=-1
    f=codecs.open(filename+'.txt','rb',encoding='utf-8')
    frl=f.readlines()

    mysite = wikipedia.getSite()

    i=0
    j=0
    fl1=1
    for s in frl:
        if ord(s[0]) == 65279:
            s=s[1:]
        s=ubr_nk_prob(s)
        if len(s)>=1:
            skon=s

    frl=[]
    f.close()
    nomerf=0
    i=0
    while (i<len(skon)) and skon[i].isdigit():
        nomerf=nomerf*10+ord(skon[i])-ord(u'0')
        i+=1
    i=iskat(skon,u' ')
    if i!=-1:
        nomsta=skon[i+1:]
    else:
        nomsta=u'!'

    nomerf+=1

    global otch,otch2,otchna,otchda
    otch = codecs.open(u'%s_ot%06d.txt' % (filename,nomerf), 'w', 'utf-8')
    otch2 = codecs.open(u'%s_op%06d.txt' % (filename,nomerf), 'w', 'utf-8')
    otchna = codecs.open(u'%s_na%06d.txt' % (filename,nomerf), 'w', 'utf-8')
    otchda = codecs.open(u'%s_da%06d.txt' % (filename,nomerf), 'w', 'utf-8')
#    otchup = codecs.open(u'%s_up%06d.txt' % (filename,nomerf), 'w', 'utf-8')

    fimen=codecs.open(filename+'.txt','a',encoding='utf-8')

    nkp=[]
    ndan=[]
    maxnkp=64

    statis=Statis()

    idan=0
    while 1:
        flkon=0
        if idan>=len(tblish):
            flkon=1
        else:
            kim=tblish[idan]
            vivod(u'%s\n'%kim)
            if kim.startswith(u'#') or kim<=nomsta:
                otch.write(u'prop %s  %s\n'%(kim,nomsta))
                idan+=1
                continue
            if poslim!=u'' and kim>=poslim:
                vivod(u'poslim!\n')
                flkon=1

        if not flkon:
            kod=kim
            k1=u''
            if kod.endswith(skbtekst):
                kod=kod[:len(kod)-len(skbtekst)]
                k1=kod
            k=kod.lower()

            if k1!=u'' and k1!=k:
                pla=wikipedia.Page(mysite,k1)
                nkp.append(pla)
                ndan.append((kod, kim, 0))

            pla=wikipedia.Page(mysite,k)
            nkp.append(pla)
            ndan.append((kod, kim, 1))

            if len(nkp)<maxnkp:
                idan+=1
                continue

        tbl={}
        sozd_buf_tbl(tbl,nkp,otch)

        vivod(u'\n\n********************************\n\n\n')

        i=0
        while i<len(nkp):
            pl=nkp[i]
            (kod, kim, zf)=ndan[i]
            rabota(tbl,pl,kod,kim,statis)
            otch.write(u'\n\n\n')
            otch.flush()
            if zf:
                zapis_fimen(fimen,nomerf,kod)
            i+=1

        statis.vivod()
        vivod(u'\n\n********************************\n\n\n')
        nkp=[]
        ndan=[]
        if flkon:
            wikipedia.output(u'vsjo!\n')
            return
        if provstop(fstop):
            wikipedia.output(u'stop!\n')
            return
        idan+=1

wikipedia.setAction('')
tblish=chit_spis_u(sys.argv[1])

filename = sys.argv[2]
if filename.endswith(u'.txt'):
    filename=filename[:len(filename)-4]

tbliwiob=[]
poslim = u''
fnorm=1
fizmali=0
fzapis=0
i=3
while i<len(sys.argv):
    if sys.argv[i].startswith(u'-s:'):
       poslim=sys.argv[i][3:]
       i+=1
    elif sys.argv[i].startswith(u'-ia'):
       fizmali=1
       i+=1
    elif sys.argv[i].startswith(u'-z'):
       fzapis=1
       i+=1
    else:
       wikipedia.output(u'stran arg! %d="%s"\n'%(i,sys.argv[i]))
       fnorm=0
       break

if fnorm:
    try:
        ivsenmain()
    except:
        wikipedia.stopme()
        raise
    else:
        wikipedia.stopme()


</nowiki>
 

Static Wikipedia (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2006 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu

Static Wikipedia February 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu