Материал из Википедии — свободной энциклопедии
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys, os
import codecs
from zbib_tekst import *
from zget_buf import *
skbtekst=u' (apartigilo)'
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubrkoment (text):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
match = nowikiOrHtmlCommentR.search(text)
while match:
text = text[:match.start()] + text[match.end():]
match = nowikiOrHtmlCommentR.search(text)
text=text.replace(u'-->',u'-- >')
return text
def prov_v_koment (text,pm):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
for match in nowikiOrHtmlCommentR.finditer(text):
for p in pm:
if p>=match.start() and p<match.end():
return 1
return 0
def prov_nal_sslk(ot,s):
st=[
u'[[%s]]'%perv_upper(s),
u'[[%s]]'%perv_lower(s),
u'[[%s|'%perv_upper(s),
u'[[%s|'%perv_lower(s),
]
(i,i1)=iskats_mn (ot,0,st)
if i!=-1:
return 1
return 0
class Statis:
def __init__(self):
self.nsozdali=0
self.nizm=0
self.nneizm=0
self.nizmali=0
self.nneizmali=0
self.nstrskb=0
self.vhsta=0
self.vhapa=0
self.vhpust=0
self.vhnet=0
self.vhali=0
def vivod(self):
vivod((u'statis: '+
u'nsozdali=%d '+
u'nizm=%d '+
u'nneizm=%d '+
u'nizmali=%d '+
u'nneizmali=%d '+
u'nstrskb=%d '+
u'vhsta=%d '+
u'vhapa=%d '+
u'vhpust=%d '+
u'vhnet=%d '+
u'vhali=%d ')%
(self.nsozdali,
self.nizm,
self.nneizm,
self.nizmali,
self.nneizmali,
self.nstrskb,
self.vhsta,
self.vhapa,
self.vhpust,
self.vhnet,
self.vhali))
def zapis_dob(se,tt,f1):
tt=perv_upper(tt)
q=[u'',u'',u'',u'',u'',u'']
q[0]=se
q[1]=se
q[2]=tt
q[3]=(u"*'''[[%s]]'''")%tt
q[4]=tt
q[5]=u'l'
qt=spisvstr(q,u'@')+u'\n'
f1.write(qt)
f1.flush()
def rabota(tbl_pl,pl,kod,kim,statis):
vivod(u'%s %s\n'%(pl.title(),kim))
si0=pl.site()
shapa=u'{{Apartigilo}}'
fnov=0
fzam=0
sali=u''
try:
pl=get_tbl(tbl_pl,pl)
ot = pl.get()
vivod(u'+ %s\n' % pl.title())
otnk=ubrkoment(ot)
if len(ot)<=2:
ot = u''
vivod(u' <- pust\n')
statis.vhpust+=1
fnov=1
elif (shapa in otnk) or (shapa.lower() in otnk):
vivod(u' <- apa\n')
otchna.write(u'* [[%s]]\n'% pl.title())
otchna.flush()
statis.vhapa+=1
return
else:
vivod(u' <- ne apa\n')
statis.vhsta+=1
except wikipedia.NoPage:
vivod(u'- %s\n' % pl.title())
ot = u''
statis.vhnet+=1
fnov=1
except wikipedia.IsRedirectPage:
sali=pl.getRedirectTarget()
vivod(u'+ %s\n-> %s\n' % (pl.title(),sali))
ot = u''
statis.vhali+=1
#?????????????? return?
# return
if sali!=u'':
if perv_upper(sali)==perv_upper(kim):
statis.nneizmali+=1
return
else:
zapis_dob(kod,sali,otchda)
if not fizmali:
return
if ot==u'':
if kim==pl.title()+skbtekst:
vivod(u' <- ???\n')
otchna.write(u'*? [[%s]]\n'% pl.title())
otchna.flush()
statis.nstrskb+=1
return
ot=u'#REDIRECT [[%s]]'%kim
if sali!=u'':
statis.nizmali+=1
vivod(u'.izm ali -> %s\n'%kim)
else:
statis.nsozdali+=1
vivod(u'.sozd ali -> %s\n'%kim)
fzam=1
else:
zapis_dob(kod,pl.title(),otchda)
if prov_nal_sslk(otnk,kim):
statis.nneizm+=1
vivod(u'.neizm uzxe -> %s\n'%kim)
else:
t=(u"''Pri la aliaj signifoj de %s rigardu en [[%s]].''\n----\n"%
(kod,kim))
statis.nizm+=1
ot=t+ot
vivod(u'.izm -> %s\n'%kim)
fzam=1
if fzam:
otch2.write(u'%s\n+=======\n%s\n============\n' % (pl.title(),ot))
otch2.flush()
komm=u''
try:
if fzapis:
pl.put(ot, comment = komm, minorEdit = False)
pass
except wikipedia.EditConflict:
vivod('!!!!EditConflict!!!!\n')
except wikipedia.LockedPage:
vivod('!!!!LockedPage!!!!\n')
statis.vivod()
return
def zapis_fimen(fimen,nomerf,t):
fimen.write(u'%d %s\n' % (nomerf,t))
fimen.flush()
def provstop(fstop):
return os.access(fstop,os.F_OK)
def ivsenmain():
fstop='dob_apa_ali_stop.txt'
try:
os.unlink(fstop)
except:
pass
if provstop(fstop):
wikipedia.output(u'stop0!\n')
return
n=u''
skon=u''
nomerf=-1
f=codecs.open(filename+'.txt','rb',encoding='utf-8')
frl=f.readlines()
mysite = wikipedia.getSite()
i=0
j=0
fl1=1
for s in frl:
if ord(s[0]) == 65279:
s=s[1:]
s=ubr_nk_prob(s)
if len(s)>=1:
skon=s
frl=[]
f.close()
nomerf=0
i=0
while (i<len(skon)) and skon[i].isdigit():
nomerf=nomerf*10+ord(skon[i])-ord(u'0')
i+=1
i=iskat(skon,u' ')
if i!=-1:
nomsta=skon[i+1:]
else:
nomsta=u'!'
nomerf+=1
global otch,otch2,otchna,otchda
otch = codecs.open(u'%s_ot%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otch2 = codecs.open(u'%s_op%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchna = codecs.open(u'%s_na%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchda = codecs.open(u'%s_da%06d.txt' % (filename,nomerf), 'w', 'utf-8')
# otchup = codecs.open(u'%s_up%06d.txt' % (filename,nomerf), 'w', 'utf-8')
fimen=codecs.open(filename+'.txt','a',encoding='utf-8')
nkp=[]
ndan=[]
maxnkp=64
statis=Statis()
idan=0
while 1:
flkon=0
if idan>=len(tblish):
flkon=1
else:
kim=tblish[idan]
vivod(u'%s\n'%kim)
if kim.startswith(u'#') or kim<=nomsta:
otch.write(u'prop %s %s\n'%(kim,nomsta))
idan+=1
continue
if poslim!=u'' and kim>=poslim:
vivod(u'poslim!\n')
flkon=1
if not flkon:
kod=kim
k1=u''
if kod.endswith(skbtekst):
kod=kod[:len(kod)-len(skbtekst)]
k1=kod
k=kod.lower()
if k1!=u'' and k1!=k:
pla=wikipedia.Page(mysite,k1)
nkp.append(pla)
ndan.append((kod, kim, 0))
pla=wikipedia.Page(mysite,k)
nkp.append(pla)
ndan.append((kod, kim, 1))
if len(nkp)<maxnkp:
idan+=1
continue
tbl={}
sozd_buf_tbl(tbl,nkp,otch)
vivod(u'\n\n********************************\n\n\n')
i=0
while i<len(nkp):
pl=nkp[i]
(kod, kim, zf)=ndan[i]
rabota(tbl,pl,kod,kim,statis)
otch.write(u'\n\n\n')
otch.flush()
if zf:
zapis_fimen(fimen,nomerf,kod)
i+=1
statis.vivod()
vivod(u'\n\n********************************\n\n\n')
nkp=[]
ndan=[]
if flkon:
wikipedia.output(u'vsjo!\n')
return
if provstop(fstop):
wikipedia.output(u'stop!\n')
return
idan+=1
wikipedia.setAction('')
tblish=chit_spis_u(sys.argv[1])
filename = sys.argv[2]
if filename.endswith(u'.txt'):
filename=filename[:len(filename)-4]
tbliwiob=[]
poslim = u''
fnorm=1
fizmali=0
fzapis=0
i=3
while i<len(sys.argv):
if sys.argv[i].startswith(u'-s:'):
poslim=sys.argv[i][3:]
i+=1
elif sys.argv[i].startswith(u'-ia'):
fizmali=1
i+=1
elif sys.argv[i].startswith(u'-z'):
fzapis=1
i+=1
else:
wikipedia.output(u'stran arg! %d="%s"\n'%(i,sys.argv[i]))
fnorm=0
break
if fnorm:
try:
ivsenmain()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
</nowiki>