Материал из Википедии — свободной энциклопедии
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia
import re, sys
import codecs
import catlib
from zbib_tekst import *
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def korr_im_kat(t):
jaz=mysite.lang
katprefi=mysite.family.category_namespace(jaz)
t=perv_upper(t)
if t.startswith(katprefi+u':'):
t=t[len(katprefi)+1:]
t=perv_upper(t)
if t==u'':
return u''
t=katprefi+u':'+t
return t
def pereim(ish,kon,komm,fvih,tish,tkon):
pl = wikipedia.Page(mysite,ish)
wikipedia.output(u'\n'+pl.title())
ot=u''
try:
ot = pl.get()
b = u'+ %s\n' % pl.title()
vivod(b)
except wikipedia.NoPage:
b = u'- %s\n' % pl.title()
vivod(b)
except wikipedia.IsRedirectPage:
b = u'+ %s\n-> %s\n' % (pl.title(),pl.getRedirectTarget())
vivod(b)
otch2.write(u'%s\n-=======\n%s\n============\n' % (pl.title(),ot))
otch2.flush()
pln = wikipedia.Page(mysite,kon)
wikipedia.output(u'\n'+pln.title())
nt=u''
try:
nt = pln.get()
b = u'+ %s\n' % pln.title()
vivod(b)
except wikipedia.NoPage:
b = u'- %s\n' % pln.title()
vivod(b)
except wikipedia.IsRedirectPage:
b = u'+ %s\n-> %s\n' % (pln.title(),pln.getRedirectTarget())
vivod(b)
otch2.write(u'%s\n-=======\n%s\n============\n' % (pln.title(),nt))
otch2.flush()
if ot==u'' and nt==u'':
vivod(u'!!!net %s -> %s\n'%(pl.title(),pln.title()))
return u''
jaz=mysite.lang
katprefi=mysite.family.category_namespace(jaz)
katdisprefi=mysite.family.namespace(jaz,15)
ishdi=katdisprefi+ish[len(katprefi):]
ishcat=u'Category'+ish[len(katprefi):]
kondi=katdisprefi+kon[len(katprefi):]
if nt==u'':
vivod(u'%s -> %s\n'%(pl.title(),pln.title()))
otch2.write(u'%s\n+=======\n%s\n============\n' % (pln.title(),ot))
otch2.flush()
pln.put(ot, comment = komm, minorEdit = False)
ot2=u'-> [[:%s]]\n{{forigu}}\n'%pln.title()
otch2.write(u'%s\n+=======\n%s\n============\n' % (pl.title(),ot2))
otch2.flush()
pl.put(ot2, comment = ot2, minorEdit = False)
if 1:
vivod(u'+ %s -> %s\n'%(ish,kon))
else:
vivod(u'- %s -> %s\n'%(ish,kon))
pldi = wikipedia.Page(mysite,ishdi)
if pldi.exists():
b = u'+ %s\n' % pldi.title()
vivod(b)
vivod(u'%s -> %s\n'%(ishdi,kondi))
r=0
r=pldi.move(kondi,komm)
if r:
vivod(u'+ %s -> %s\n'%(ishdi,kondi))
else:
vivod(u'- %s -> %s\n'%(ishdi,kondi))
b=((u'| [[%s]] -> [[%s]] %s\n')%(ish,kon,komm))
fvih.write(b)
fvih.flush()
workingcat = catlib.Category(mysite,ish)
list = workingcat.articles()
if list:
for ka in list:
fvih.write( u'%s\n' % ka.title() )
fvih.flush()
list = workingcat.subcategories()
if list:
for ka in list:
kat=ka.title()
for i in range(len(tish)):
if kat==tish[i]:
kat=tkon[i]
fvih.write( u'%s\n' % kat )
fvih.flush()
list = pl.getReferences()
if list:
for ka in list:
kat=ka.title()
ns=ka.namespace()
if (ns&1)==1 or ns==2 or ns==4:
vivod(u'! ns=%d %s\n'%(ns,kat))
continue
for i in range(len(tish)):
if kat==tish[i]:
kat=tkon[i]
fvih.write( u'%s\n' % kat )
fvih.flush()
b=((u'?=======\n1k\n-=======\n[[%s|\n+=======\n[[%s|\n'+
u'?=======\n1k\n-=======\n[[%s]]\n+=======\n[[%s]]\n'+
u'?=======\n1k\n-=======\n[[:%s|\n+=======\n[[:%s|\n'+
u'?=======\n1k\n-=======\n[[:%s]]\n+=======\n[[:%s]]\n'+
u'?=======\n1k\n-=======\n[[%s|\n+=======\n[[%s|\n'+
u'?=======\n1k\n-=======\n[[%s]]\n+=======\n[[%s]]\n'+
u'?=======\n1k\n-=======\n[[:%s|\n+=======\n[[:%s|\n'+
u'?=======\n1k\n-=======\n[[:%s]]\n+=======\n[[:%s]]\n'+
u'============\n')%(ish,kon,ish,kon,ish,kon,ish,kon,
ishcat,kon,ishcat,kon,ishcat,kon,ishcat,kon,))
fvih.write(b)
fvih.flush()
vivod(u'\n')
return ish
def main(fivh,fivih):
komm=u''
f=codecs.open(fivh,'rb',encoding='utf-8')
fvih = codecs.open(fivih, 'w', 'utf-8')
tish=[]
tkon=[]
tkomm=[]
ishte=u''
konte=u''
udlte=u''
for s in f.readlines():
if ord(s[0]) == 65279:
s=s[1:]
s=ubr_nk_prob(s)
st=s.split(u'|',1)
if len(st)!=2:
continue
ish=ubr_nk_prob(st[0])
kon=ubr_nk_prob(st[1])
if ish==u'':
komm=kon
else:
ish=korr_im_kat(ish)
kon=korr_im_kat(kon)
tish.append(ish)
tkon.append(kon)
tkomm.append(komm)
ishte+=u'* [[%s]]\n'%ish
konte+=u'* [[%s]]\n'%kon
udlte+=u'[[:%s]] '%ish
vivod(u'%s\n\n\n%s\n\n\n%s\n\n\n'%(ishte,konte,udlte))
for i in range(len(tish)):
pereim(tish[i],tkon[i],tkomm[i],fvih,tish,tkon)
wikipedia.setAction('')
fi = wikipedia.argHandler(sys.argv[1], 'cht_cat')
if fi.endswith(u'.txt'):
fi=fi[:len(fi)-4]
fivh = fi+'.txt'
fotch = fi+'_oi.txt'
fotch2 = fi+'_op.txt'
fivih = fi+'_z.txt'
#filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
#fotch = wikipedia.argHandler(sys.argv[2], 'cht_cat')
#fotch2 = wikipedia.argHandler(sys.argv[3], 'cht_cat')
flzap=1
if len(sys.argv)>=3 and sys.argv[2]==u'n':
flzap=0
otch = codecs.open(fotch, 'w', 'utf-8')
otch2 = codecs.open(fotch2, 'w', 'utf-8')
mysite = wikipedia.getSite()
try:
main(fivh,fivih)
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()