Материал из Википедии — свободной энциклопедии
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia, pagegenerators
import re, sys, os
import codecs
import zperevod as perevod
import zimagetransfer1 as imagetransfer1
from zbib_tekst import *
from zget_buf import *
from zbib_kateg import *
gl_dobav_iz=0
te_bildoj = {
'ru':u'изображений',
'eo':u'bildoj',
}
te_ekslj= {
'ru':u'внешних ссылок',
'eo':u'eksteraj ligiloj',
}
te_komunejoj = {
'ru':u'викискладов',
'eo':u'komunejoj',
}
te_ligoj = {
'ru':u'ссылок',
'eo':u'ligoj',
}
te_tabeloj = {
'ru':u'таблиц',
'eo':u'tabeloj',
}
te_kategorioj = {
'ru':u'категорий',
'eo':u'kategorioj',
}
te_vi_di = {
'ru':u'см. обсуждение',
'eo':u'vidu diskuton',
}
te_diskuto= {
'ru':u'обсуждение',
'eo':u'diskuto',
}
te_al_bildoj = {
'ru':u'Дополнительные изображения',
'eo':u'Aldonaj bildoj',
}
te_al_ekslj= {
'ru':u'Дополнительные внешние ссылки',
'eo':u'Aldonaj eksteraj ligiloj',
}
te_aldon= {
'ru':u'Добавления',
'eo':u'Aldonaĵoj',
}
te_aldon_kat= {
'ru':u'Категория:Добавления для статей',
'eo':u'Kategorio:Aldonajxoj por artikoloj',
}
def vivod(b):
wikipedia.output(b)
otch.write(b)
otch.flush()
def ubrkoment (text):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'<nowiki>.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
match = nowikiOrHtmlCommentR.search(text)
while match:
text = text[:match.start()] + text[match.end():]
match = nowikiOrHtmlCommentR.search(text)
text=text.replace(u'-->',u'-- >')
return text
def prov_v_koment (text,pm):
# Ignore tekst within nowiki tags and HTML comments
nowikiOrHtmlCommentR = re.compile(r'.*?|<!--.*?-->', re.IGNORECASE | re.DOTALL)
for match in nowikiOrHtmlCommentR.finditer(text):
for p in pm:
if p>=match.start() and p<match.end():
return 1
return 0
def provcifr(t):
i=ord(t[0])
return (i>=ord(u'0')) & (i<=ord(u'9'))
def prov_te_format_kart(t):
minrzm=50
t=ubr_nk_prob(t)
if (t==u'thumb') or (t==u'thumbnail') or (t==u'frame'):
return 1
if (t==u'left') or (t==u'center') or (t==u'right'):
return 1
if len(t)<3:
return 0
x=0
i=0
while i<6 and i<len(t) and provcifr(t[i]):
x=x*10+ord(t[i])-ord(u'0')
i+=1
if (i>=1) and (t[i:]==u'px'):
if x<minrzm:
return 2
return 1
return 0
def linkedPages_podp(site,thistxt):
"""Gives the normal (not-interwiki, non-category) pages the page
links to, as a list of Page objects
"""
result = []
result_podp = []
thistxt = wikipedia.removeLanguageLinks(thistxt)
thistxt = wikipedia.removeCategoryLinks(thistxt, site)
thistxt = ubrkoment (thistxt)
# [[(?P<title>[^]|]*)(|[^]]*)?]]
i=0
Rlink = re.compile(r'\[\[(?P<title>[^\]\|]*)(\|[^\]]*)?\]\]')
for match in Rlink.finditer(thistxt):
title = match.group('title')
# if site.lang == 'eo':
# title = wikipedia.resolveEsperantoXConvention(title)
page = wikipedia.Page(site, title)
podp=u''
j=iskat(thistxt[match.start():match.end()],u'|')
if j!=-1:
j+=match.start()+1
kon=iskkonpodp(thistxt[j:],0,u'[[',u']]')
if kon!=-1:
podp=thistxt[j:j+kon]
result.append(page)
result_podp.append(podp)
i+=1
return (result,result_podp)
def razb_arg(t):
r=[]
p0=0
while p0<len(t):
p1=iskats(t,p0,u'|')
if p1==-1:
p1=len(t)
a=t[p0:p1]
a=ubr_nk_prob(a)
r.append(a)
p0=p1+1
return r
def image_podp(site,thistxt):
"""
Gives the images the page shows, as a list of Page objects.
This includes images in galleries.
"""
result = []
# Find normal images
(res1,res1_podp)=linkedPages_podp(site,thistxt)
j=0
while j<len(res1):
if res1[j].isImage():
result.append(imagetransfer1.Izobr(res1[j].title(),site.lang,res1_podp[j]))
j+=1
# Find images in galleries
galleryR = re.compile('<gallery>.*?</gallery>', re.DOTALL)
galleryEntryR = re.compile('(?P<title>(%s|%s):.+?)(\|.+)?\n' % (site.image_namespace(), site.family.image_namespace(code = '_default')))
for gallery in galleryR.findall(thistxt):
# for match in galleryEntryR.finditer(gallery):
# page = wikipedia.Page(site, match.group('title'))
# result.append(imagetransfer1.Izobr(page.title(),site.lang,u''))
for match in gallery.split(u'\n'):
# vivod(u'match=%s\n'%match)
podp=u''
p=iskat(match,u'|')
if p!=-1:
podp=ubr_nk_prob(match[p+1:])
else:
p=len(match)
ti=ubr_nk_prob(match[:p])
# vivod(u'ti=%s\n'%ti)
if len(ti)<1:
continue
page = wikipedia.Page(site, ti)
if not page.isImage():
continue
result.append(imagetransfer1.Izobr(page.title(),site.lang,podp))
j=0
while j<len(result):
t=result[j].iz
p=iskat(t,u':')
if p!=-1:
result[j].iz=t[p+1:]
t=result[j].podp
tn=u''
fpred=0
am=razb_arg(t)
for a in am:
ptf=prov_te_format_kart(a)
if ptf==0:
if fpred:
tn=tn+'|'
fpred=1
tn=tn+a
elif ptf==2:
result[j].tip=100
vivod(u' - m px %s\n'%result[j].iz)
break
result[j].podp=tn
j+=1
return result
class Statis:
def __init__(self):
self.ob=0
self.iz=0
self.shcom=0
self.eksl=0
self.ssilk=0
self.tabl=0
self.kat=0
class Ssilk:
def __init__(self, ss,jaz,podp):
self.ss = ss
self.jaz = jaz
self.podp = podp
def sravn(self,e1):
return self.ss!=e1.ss
# def tekst(self):
# return u'[[%s]]'%self.ss
def isk_ssilk(site,thistxt):
result = []
(res1,res1_podp)=linkedPages_podp(site,thistxt)
j=0
while j<len(res1):
if not res1[j].isImage():
result.append(Ssilk(res1[j].title(),site.lang,res1_podp[j]))
j+=1
return result
def isk_ssilk_smt(site,thistxt):
result = []
st=thistxt.split(u'\n')
for t in st:
p=iskats(t,0,u'[[')
if p==-1 or p>4:
continue
(res1,res1_podp)=linkedPages_podp(site,t)
j=0
# while j<len(res1):
if len(res1)>=1:
if not res1[j].isImage():
result.append(Ssilk(res1[j].title(),site.lang,res1_podp[j]))
j+=1
return result
def perv_upper(t):
if len(t)<1:
return u''
return t[0].upper()+t[1:]
class Shabl:
def __init__(self, nazv,jaz,arg):
self.nazv=perv_upper(nazv)
self.jaz=jaz
self.arg=[]
self.neisp=0
for a in arg:
self.arg.append(a)
def sravn(self,e1):
if len(self.arg)!=len(e1.arg):
return 1
if self.nazv!=e1.nazv:
return 1
for i in range(len(self.arg)):
if self.arg[i]!=e1.arg[i]:
return 1
return 0
def sravn_com(self,e1):
if len(self.arg)<1 or len(e1.arg)<1:
return 1
if self.nazv!=e1.nazv:
return 1
if (perv_upper(self.arg[0]).replace(u'_',u' ')!=
perv_upper(e1.arg[0]).replace(u'_',u' ')):
return 1
return 0
def tekst(self):
t=u'{{'+self.nazv
for a in self.arg:
t+=u'|'+a
t+=u'}}'
return t
def tekstn(self,n):
t=u'{{'+self.nazv
i=0
while i<n and i<len(self.arg):
a=self.arg[i]
t+=u'|'+a
i+=1
t+=u'}}'
return t
def isk_shabl(jaz,t,z):
r=[]
zm=[]
for z1 in z:
zm.append(z1[0].lower()+z1[1:])
p0=0
while p0<len(t):
p=iskats(t,p0,u'{{')
if p==-1:
break
pn=p+2
# if pn>=len(t)-1:
# break
# if t[pn]==u' ' or t[pn]==u'\n':
# pn+=1
# if t[pn:].startswith(z)) or t[pn:].startswith(z):
pk=iskkonpodp(t,pn,u'{{',u'}}')
if pk==-1:
break
am=razb_arg(t[pn:pk])
if len(am)>=1:
fsovp=0
for z1 in z:
if am[0]==z1:
fsovp=1
for z1 in zm:
if am[0]==z1:
fsovp=1
if fsovp:
am1=[]
for a in am[1:]:
am1.append(a)
r.append(Shabl(am[0],jaz,am1))
p0=pk+2
return r
def korr_com_shabl(sh):
if len(sh.arg)<1:
sh.neisp=1
# vivod(u' <-shcom len(arg)=0\n')
return
if sh.nazv==u'Commonscat':
sh.nazv=u'Commons'
sh.arg[0]=u'Category:'+sh.arg[0]
if sh.nazv!=u'Commons':
return
if sh.arg[0].startswith(u':Category:'):
sh.arg[0]=sh.arg[0][1:]
if sh.arg[0].startswith(u'Category:'):
sh.arg[0]=sh.arg[0][:9]+perv_upper(sh.arg[0][9:])
sh.arg[0]=sh.arg[0].replace(u'_',u' ')
class Eksl:
def __init__(self, adr,jaz,podp):
p=iskats(adr,8,'/')
if p==-1:
p=len(adr)
adr=adr[:p].lower()+adr[p:]
self.adr = adr
self.jaz = jaz
self.podp = podp
self.podp2 = u''
self.tip=100 # >=80 =nelzja ispolz
self.tip2=100
self.prioritet=0
self.njazisp=1
self.njazisp2=0
def vz_prioritet(self):
return self.prioritet
def sravn(self,e1):
return self.adr!=e1.adr
def tekst(self):
return u'%s %s'%(self.adr,self.podp)
def pertekst(self):
return u'%s %s'%(self.adr,self.perpodp.t)
def isk_eksl(jaz,t):
r=[]
p0=0
while p0<len(t):
p=iskats(t,p0,u'http://')
if p==-1:
break
flks=((p>0 and t[p-1]==u'[') or (p>1 and t[p-2:p]==u'[ '))
if flks:
pk=iskkonpodp(t,p,u'[',u']')
if pk==-1:
flks=0
if not flks:
pk=iskats(t,p,u'\n')
if pk==-1:
pk=len(t)
(p1,np1)=iskats_mn(t[:pk],p,[u' ',u'}',u')',u'<',u'>'])
if p1==-1:
p1=pk
adr=t[p:p1]
podp=t[p1+1:pk]
r.append(Eksl(adr,jaz,podp))
p0=p1
shm=isk_shabl(jaz,t,(u'El2',))
for sh in shm:
if len(sh.arg)>=1:
podp=u''
if len(sh.arg)>=2:
podp=sh.arg[1]
r.append(Eksl(u'http://'+sh.arg[0],jaz,podp))
for el in r:
while len(el.adr)>=1 and el.adr[len(el.adr)-1]==u'/':
el.adr=el.adr[:len(el.adr)-1]
return r
class Tabl:
def __init__(self, jaz,te):
self.jaz = jaz
self.te = te
def sravn(self,e1):
return self.te!=e1.te
def tekst(self):
return self.te+u'\n'
def isk_tabl(jaz,t):
ost=[u'{|',u'<table']
zst=[u'|}',u'</table>']
r=[]
p0=0
while p0<len(t):
(p,n)=iskats_mn(t,p0,ost)
if p==-1:
break
os=ost[n]
zs=zst[n]
pk=iskkonpodp(t,p+len(os),os,zs)
if pk==-1:
break
pk+=len(zs)
r.append(Tabl(jaz,t[p:pk]))
p0=pk
return r
t_zag_eksl = {
u'eo': (u'Eksteraj ligiloj', u'Ekstera ligilo', u'Eksteraj ligoj',
u'Ekstera ligo', u'Rete'),
u'io': (u'Externa ligili', u'Extera ligili'),
u'ru': (u'Внешние ссылки', u'Внешняя ссылка'),
}
t_zag_vian = {
u'de': (u'Siehe auch',u'Weitere themen'),
u'en': (u'See also',u'Miscellaneous topics'),
u'eo': (u'Vidu ankaux jenon:',u'Vidu ankaux jenon',u'Vidu ankaux'),
u'fr': (u'Voir aussi',),
u'io': (u'Videz anke',),
u'nl': (u'Zie ook',),
u'ru': (u'См. также', u'Смотри также', u'Смотрите также'),
}
def gl_zagolov(jaz,zag):
if not zag.has_key(jaz):
return u'*'
return zag[jaz][0]
def isk_zagolov(t,jaz,zag):
if not zag.has_key(jaz):
return (len(t),len(t))
tm=t.lower()
zj=zag[jaz]
p1=len(t)
while p1>1:
# if t[p1-1]==u'\n':
# p1-=1
p0=iskato(t,p1-2,u'\n')+1
for z in zj:
zm=z.lower()
i=iskats(tm[:p1],p0,zm)
if i!=-1:
fll=0
for c in t[p0:i]:
if c.isalnum():
fll=1
for c in t[i+len(z):p1]:
if c.isalnum():
fll=1
if prov_v_koment(t,(i,i+len(z),p1)):
fll=1
if fll==0:
return (p0,p1)
p1=p0
return (len(t),len(t))
def isk_kon_zagolov(t,p0,p1):
i=0
while p0+i<len(t) and t[p0+i]==u'=':
i+=1
if i==0:
i=100
if p1>0 and t[p1-1]==u'\n':
p1-=1
p=p1
while 1:
p=iskats(t,p,u'\n=')
if p==-1:
return len(t)
p+=1
j=1
while p+j<len(t) and t[p+j]==u'=':
j+=1
if j<=i:
return p
class Perev_podp:
def __init__(self,t):
self.t = t
self.ps = perevod.Perev_stat()
# self.ps = ps
def objed_podp(podp,podp1):
# if podp==u'':
# podp=podp1
# elif podp1!=u'':
# podp+=u' '+podp1
if podp.ps.nup+podp.ps.nvap < podp1.ps.nup+podp1.ps.nvap:
podp=podp1
return podp
def perevod_ob(slov,vhjaz,vihjaz,podp):
if podp==u'' or podp==u' ':
# return u''
return Perev_podp(u'')
(podpperev,st_perev)=perevod.perevod_stat(slov,vhjaz,vihjaz,podp)
# if fl_perev:
# podp=u'%s:%s %s->%s:%s' % (vhjaz,podp,vhjaz,vihjaz,podpperev)
# else:
# podp=u'%s:%s' % (vhjaz,podp)
# return podp
r=Perev_podp(podpperev)
r.ps=st_perev
return r
def prov_tekst_uzxe(t,s):
#X!!!
return iskats(t,0,s)!=-1
def prov_dobav_katcom1(nscom,zapriz,katcomsp,n):
n=perv_upper(ubr_nk_prob(n))
vivod(u' prov_dobav_katcom1 %s\n'%n)
p=iskats(n,0,u'#')
if p!=-1:
n=n[:p]
vivod(u' prov_dobav_katcom1# %s\n'%n)
t=u'Category:'+n
if nscom.has_key(t) and (not zapriz.has_key(n)):
t1=u'[['+t+u']]\n'
if not t1 in katcomsp:
katcomsp.append(t1)
vivod(u' <-- ++\n')
return 1
return 0
def prov_dobav_katcom(nscom,zapriz,katcomsp,vhs,jaz):
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs)
if jaz==u'en':
vhok=vhs[len(vhs)-1]
if vhok==u'y':
vhs1=vhs[:len(vhs)-1]+u'ies'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
else:
vhs1=vhs+u's'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
vhs1=vhs+u'es'
prov_dobav_katcom1(nscom,zapriz,katcomsp,vhs1)
def shcom_v_katcom(nscom,zapriz,katcomsp,sht):
vivod(u' shcom_v_katcom0 %d\n'%len(sht))
for sh in sht:
if sh.neisp or len(sh.arg)<1:
continue
n=sh.arg[0]
vivod(u' shcom_v_katcom0 %s\n'%n)
n2=n[iskat(n,u':')+1:]
usp=prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
if usp:
continue
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
pli = wikipedia.Page(sico,n)
if pli.isCategory():
continue
try:
get1(pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
n1=pli.getRedirectTarget()
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),n1)
vivod(b)
n2=n1[iskat(n1,u':')+1:]
vivod(u' shcom_v_katcom %s\n'%n2)
usp=prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
if usp:
continue
pli = wikipedia.Page(pli.site(),pli.getRedirectTarget())
if pli.isCategory():
continue
try:
get1(pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
n1=pli.getRedirectTarget()
n2=n1[iskat(n1,u':')+1:]
vivod(u' shcom_v_katcom %s\n'%n2)
prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
continue
otch2.write(u'======shcom_v_katcom ti======\n%s\n-=======\n%s\n===============================\n' % (pli.title(),ti))
otch2.flush()
plikat = pli.categories()
vivod(u' shcom_v_katcom len plikat %d\n'%len(plikat))
for pka in plikat:
n1=pka.title()
vivod(u' shcom_v_katcom1 %s\n'%n1)
n2=n1[iskat(n1,u':')+1:]
prov_dobav_katcom1(nscom,zapriz,katcomsp,n2)
def shcom_prov_uzxe_povt(shcomi,shcomo):
for j in range(len(shcomo)):
sh=shcomo[j]
vivod(u' %s\n'%sh.tekst())
if sh.neisp:
vivod(u' <-neisp shcom\n')
continue
fuzxe=0
for sh0 in shcomi:
if not sh.sravn_com(sh0):
fuzxe=1
break
if fuzxe:
shcomo[j].neisp=1
vivod(u' <-uzxe shcom\n')
continue
fpovt=0
for k in range(j):
if (not shcomo[k].neisp) and (not sh.sravn_com(shcomo[k])):
fpovt=1
break
if fpovt:
shcomo[j].neisp=1
vivod(u' <-povt shcom\n')
continue
def put_prov(pl,nt,komm,me):
pl.put(nt, comment = komm, minorEdit = me)
return
ot=pl.get()
i=0
while i<3:
pl.put(nt, comment = komm, minorEdit = me)
pl1=wikipedia.Page(pl.site(), pl.title())
try:
t1=pl1.get()
except wikipedia.NoPage:
b = u' put_prov: - %s\n' % pl.title()
vivod(b)
t1=u''
except wikipedia.IsRedirectPage:
b = u' put_prov: - %s\n -> %s\n' % (pl.title(),pl.getRedirectTarget())
vivod(b)
return
if t1!='' and ot!=t1:
break
vivod(u'!!!! neud put() %s'%pl.title())
pl=pl1
i+=1
def vibor_iwi(tpl):
nmax=8
tpri={u'en': 0, u'de': 1, u'ja': 2, u'fr': 3, u'pl': 4, u'nl': 5, u'it': 6,
u'es': 7, u'sv': 8, u'pt': 9, u'zh': 10, u'he': 11, u'ru': 12,
u'no': 13, u'fi': 14, u'cs': 15, u'da': 16, u'hu': 17, u'eo': 18,
u'ca': 19, u'bg': 20, u'sl': 21, u'sk': 22, u'et': 23, u'sr': 24,
u'id': 25, u'ro': 26, u'nn': 27, u'ko': 28, u'gl': 29, u'hr': 30,
u'uk': 31, u'lt': 32, u'ms': 33, u'vi': 34, u'tr': 35, u'th': 36,
u'io': 37, u'el': 38, u'ar': 39, u'is': 40, u'lb': 41, u'tt': 42,
u'simple': 43, u'bs': 44, u'ka': 45, u'fa': 46, u'sh': 47, u'af': 48,
u'br': 49, u'la': 50,}
ts=[]
for i in range(len(tpl)):
jaz=tpl[i].site().lang
z=1000
if tpri.has_key(jaz):
z=tpri[jaz]
ts.append((z,i))
vivod(u'vibor_iwi1: z=%d jaz=%s\n'%(z,tpl[i].site().lang))
ts.sort()
ntpl=[]
n=0
for z,i in ts:
ntpl.append(tpl[i])
vivod(u'vibor_iwi2: z=%d jaz=%s\n'%(z,tpl[i].site().lang))
n+=1
if n>=nmax:
vivod(u'vibor_iwi: otbr!\n')
break
return ntpl
#def rabota(slov,nscom,nssvoj,zapriz,tbl_pl,plprov,fproviz,statis):
# si0=plprov.site()
# pl=wikipedia.Page(si0, plprov.title())
# vivod(u'%s\n'%pl.title())
# try:
# plprov=get_tbl(tbl_pl,plprov)
# ot = plprov.get()
# except wikipedia.NoPage:
# b = u'- %s\n' % plprov.title()
# vivod(b)
# return
# except wikipedia.IsRedirectPage:
# b = u'+ %s\n-> %s\n' % (plprov.title(), plprov.getRedirectTarget())
# vivod(b)
# return
# if len(plprov.interwiki())<1:
# vivod(u'-iwi\n')
# return
def rabota(slov,nscom,nssvoj,zapriz,tbl_pl,pl,fproviz,statis):
vivod(u'%s\n'%pl.title())
si0=pl.site()
try:
ot = pl.get()
except wikipedia.NoPage:
b = u'-2 %s\n' % pl.title()
vivod(b)
return
except wikipedia.IsRedirectPage:
b = u'+2 %s\n-> %s\n' % (pl.title(),pl.getRedirectTarget())
vivod(b)
return
otch.write(u'+ %s\n' % pl.title())
otch.flush()
otch2.write(u'%s\n-=======\n%s\n========\n' % (pl.title(),ot))
otch2.flush()
oiwi = pl.interwiki()
niwi = {}
for pl2 in oiwi:
if pl.site()!=pl2.site():
niwi[pl2.site()] = pl2
if len(niwi)<1:
vivod(u'-iwi2\n')
return
plkat = pl.categories()
nt = wikipedia.removeLanguageLinks(ot)
# nt = wikipedia.removeCategoryLinks(nt,pl.site())
nt = nt.replace(u'\r',u'')
while u' \n' in nt:
nt = nt.replace(u' \n',u'\n')
if len(nt)<1 or nt[len(nt)-1]!=u'\n':
nt+=u'\n'
fzam=0
jaz0=pl.site().lang
ntbk=ubrkoment(nt)
#!!!!!!!!!
# shapam=pl.site().family.disambig(jaz0)
shapam=[u'apartigilo']
fapa=0
for shapa in shapam:
if (iskats(ot,0,u'{{'+shapa[0].lower()+shapa[1:]+'}}')!=-1 or
iskats(ot,0,u'{{'+shapa[0].upper()+shapa[1:]+'}}')!=-1):
fapa=1
break
if fapa:
b = u'apa %s\n' % pl.title()
vivod(b)
return
res0=image_podp(pl.site(),ntbk)
vivod(u'len(res0)=%d\n'%len(res0))
kati=[]
for pka in plkat:
kati.append(Kateg(jaz0,pka.title(),1))
nomio=[(pl.title(),jaz0)]
shcomi=[]
eksli=[]
ssilki=[]
shcomi=isk_shabl(jaz0,ntbk,(u'Commons',u'Commonscat'))
for sh in shcomi:
korr_com_shabl(sh)
if not fproviz:
# eksli=isk_eksl(jaz0,ntbk)
ssilki=isk_ssilk(pl.site(),ntbk)
reso=[]
shcomo=[]
ekslo=[]
ssilko=[]
tablo=[]
kato=[]
itbot = imagetransfer1.ImageTransferBot(
targetSite = wikipedia.getSite(),otch=otch,opis=otch2,
otchnc=otchnc,nscom=nscom,otchkc=otchkc,otchup=otchup)
itbot.tbl_pl=tbl_pl
# for j in range(len(res0)):
# res0[j].tip=10
# iz=res0[j].iz
# b2=res0[j].podp
# b2=b2.replace(u'\n',u' ')
# b = u' b0 %s %s\n' % (iz,b2)
# vivod(b)
# itbot.zagruz_info(res0,1)
# if fproviz:
# fnajd=0
# itbot.prov_nekatco(res0)
# for j in range(len(res0)):
# if res0[j].nekatco:
# fnajd=1
# if not fnajd:
# return
niwisp=[]
for pli in niwi.itervalues():
niwisp.append(pli)
for pli in vibor_iwi(niwisp):
# if pli.site().lang!=u'en' and pli.site().lang!=u'hr':
# continue
try:
# get1(pli)
pli=get_tbl(tbl_pl,pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
pli = wikipedia.Page(pli.site(),pli.getRedirectTarget())
try:
# get1(pli)
pli=get_tbl(tbl_pl,pli)
ti = pli.get()
except wikipedia.NoPage:
b = u' - %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
except wikipedia.IsRedirectPage:
b = u' + %s:%s\n -> %s\n' % (pli.site().lang,pli.title(),pli.getRedirectTarget())
vivod(b)
continue
if pli.isCategory():
b = u' +kat %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
if pli.isImage():
b = u' +bil %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
continue
b = u' + %s:%s\n' % (pli.site().lang,pli.title())
vivod(b)
ti=ubrkoment(ti)
ti = ti.replace(u'\r',u'')
while u' ' in ti:
ti = ti.replace(u' ',u' ')
while u' \n' in ti:
ti = ti.replace(u' \n',u'\n')
(p0,p1)=isk_zagolov(ti,pli.site().lang,t_zag_vian)
p2=isk_kon_zagolov(ti,p0,p1)
plikat = pli.categories()
for pka in plikat:
kato.append(Kateg(pli.site().lang,pka.title(),1))
nomio.append( (pli.title(),pli.site().lang) )
shcoma=isk_shabl(pli.site().lang,ti,(u'Commons',u'Commonscat'))
for sh in shcoma:
korr_com_shabl(sh)
shcomo.append(sh)
if not fproviz:
if gl_dobav_iz and len(res0)==0:
res1=image_podp(pli.site(),ti)
for iz in res1:
reso.append(iz)
# eksla=isk_eksl(pli.site().lang,ti)
# for eksl in eksla:
# ekslo.append(eksl)
# ssilka=isk_ssilk_smt(pli.site(),ti[p1:p2])
# for ssilk in ssilka:
# ssilko.append(ssilk)
# tabla=isk_tabl(pli.site().lang,ti)
# for tabl in tabla:
# tablo.append(tabl)
pass
nnov_shcom=0
nnov_eksl=0
nnov_ssilk=0
nnov_tabl=0
nnov_kat=0
ntdi=u''
nnovdi_eksl=0
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
nt_shcom=u''
shcom_prov_uzxe_povt(shcomi,shcomo)
if len(shcomo)>0:
nkpshpl=[]
for sh in shcomo+shcomi:
if sh.neisp:
continue
sh.pl=wikipedia.Page(sico,sh.arg[0])
nkpshpl.append(sh.pl)
sozd_buf_tbl(tbl_pl,nkpshpl,otch)
for sh in shcomo+shcomi:
if sh.neisp:
continue
try:
sh.pl=get_tbl(tbl_pl,sh.pl)
sh.pl.get()
b = u' + shcom:%s\n' % sh.pl.title()
vivod(b)
sh.arg[0]=sh.pl.title()
except wikipedia.NoPage:
b = u' - shcom:%s\n' % sh.pl.title()
vivod(b)
sh.neisp=1
except wikipedia.IsRedirectPage:
b = u' + shcom:%s\n -> %s\n' % (sh.pl.title(),
sh.pl.getRedirectTarget())
vivod(b)
sh.arg[0]=sh.pl.getRedirectTarget()
shcom_prov_uzxe_povt(shcomi,shcomo)
for j in range(len(shcomo)):
if shcomo[j].neisp:
continue
sh_t=shcomo[j].tekstn(1)
nt_shcom+= u'%s\n'%sh_t
nnov_shcom+=1
fzam=1
b = u' !+ shcom:%s\n' % sh_t
vivod(b)
if nnov_shcom:
nt_shcom= u'<br clear=all>\n'+nt_shcom
nt_tabl=u''
for j in range(len(tablo)):
ta=tablo[j]
tat=ta.tekst().replace(u'\n',u' ')
vivod(u' tabl: %s\n'%tat)
if prov_tekst_uzxe(nt,ta.tekst()):
vivod(u' <-uzxe tabl\n')
continue
fpovt=0
for k in range(j):
if not ta.sravn(tablo[k]):
fpovt=1
break
if fpovt:
vivod(u' <-povt tabl\n')
continue
tap=perevod_ob(slov,ta.jaz,jaz0,ta.tekst()).t
nt_tabl+= u'<!-- %s -->\n'%tap
nnov_tabl+=1
fzam=1
for j in range(len(ekslo)):
eksl=ekslo[j]
ekslo[j].perpodp=perevod_ob(slov,eksl.jaz,jaz0,eksl.podp)
ekslo[j].perpodp2=Perev_podp(u'')
nt_eksl=u''
ntdi_eksl=u''
for j in range(len(ekslo)):
ekslo[j].tip=0
eksl=ekslo[j]
perpodp=eksl.perpodp
njazisp=eksl.njazisp
# vivod(u' %s\n'%eksl.tekst())
vivod(u' %s %d %s\n'%(eksl.jaz,eksl.njazisp,eksl.adr))
fuzxe=0
for eksl0 in eksli:
if not eksl.sravn(eksl0):
fuzxe=1
break
if fuzxe:
ekslo[j].tip=80
vivod(u' <-uzxe eksl\n')
continue
if zapriz.has_key(eksl.adr):
ekslo[j].tip=80
vivod(u' <-zapr\n')
continue
fpovt=0
for k in range(len(ekslo)):
if (k!=j) and not eksl.sravn(ekslo[k]):
if k<j:
fpovt=1
break
perpodp1=ekslo[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=ekslo[k].njazisp
if fpovt:
ekslo[j].tip=80
vivod(u' <-povt eksl\n')
continue
ekslo[j].perpodp2=perpodp
ekslo[j].njazisp2=njazisp
for j in range(len(ekslo)):
ekslo[j].perpodp=ekslo[j].perpodp2
ekslo[j].njazisp=ekslo[j].njazisp2
if ekslo[j].tip<80:
ekslo[j].prioritet=ekslo[j].njazisp
ekslo.sort(key=Eksl.vz_prioritet,reverse=True)
maksn_eksl=4
for j in range(len(ekslo)):
if ekslo[j].tip>=80:
continue
if ekslo[j].perpodp.t!=u'':
ekslo[j].perpodp.t=u'<!-- '+ekslo[j].perpodp.t+u' -->'
nt1= u'{{el}} %s\n'%ekslo[j].pertekst()
if j+len(eksli)<maksn_eksl:
nt_eksl+=nt1
nnov_eksl+=1
fzam=1
else:
# ntdi_eksl+=nt1
# nnovdi_eksl+=1
pass
for j in range(len(ssilko)):
ss=ssilko[j]
pe=perevod.perevod_iwi(slov,ss.jaz,jaz0,ss.ss)
vivod(u' ss %s:%s -> %s\n'%(ss.jaz,ss.ss,pe))
ssilko[j].ss=pe
nt_ssilk=u''
for j in range(len(ssilko)):
ss=ssilko[j]
if ss.ss==u'':
continue
vivod(u' %s\n'%ss.ss)
fuzxe=0
for ss0 in ssilki:
if iskats(ss.ss,0,u'[['+ss0.ss+u']]')!=-1:
fuzxe=1
break
if fuzxe:
vivod(u' <-uzxe ssilk\n')
continue
fpovt=0
for k in range(j):
if not ss.sravn(ssilko[k]):
fpovt=1
break
if fpovt:
vivod(u' <-povt ssilk\n')
continue
nt_ssilk+= u'* %s\n'%ss.ss
nnov_ssilk+=1
fzam=1
for j in range(len(reso)):
reso[j].perpodp2=Perev_podp(u'')
if reso[j].tip>=80:
continue
iz=reso[j].iz
if ((len(iz)>=6 and iz[0:2].isdigit() and iz[2:5]==u'px-') or
(len(iz)>=7 and iz[0:3].isdigit() and iz[3:6]==u'px-')):
reso[j].tip=90
vivod(u' - px- %s\n'%iz)
continue
ik=iskato(iz,len(iz)-1,u'.')
if ik==-1:
ik=len(iz)
izi=iz[:ik]
nalf=0
ncif=0
for c in izi:
if c.isdigit():
ncif+=1
# if c.isalpha():
else:
nalf+=1
if ncif>=4 and ncif>=nalf-2:
reso[j].tip=80
vivod(u' - cif %s\n'%iz)
continue
podp=reso[j].podp
jaz=reso[j].jaz
podp=podp.replace(u'\n',u' ')
while u' ' in podp:
podp = podp.replace(u' ',u' ')
if podp==u' ':
podp=u''
perpodp=perevod_ob(slov,jaz,jaz0,podp)
reso[j].perpodp=perpodp
for j in range(len(reso)):
reso[j].tip2=reso[j].tip
if reso[j].tip>=80:
continue
iz=reso[j].iz
perpodp=reso[j].perpodp
jaz=reso[j].jaz
njazisp=reso[j].njazisp
b = u' b %s:%s %s\n' % (jaz,iz,podp)
vivod(b)
if zapriz.has_key(iz):
reso[j].tip2=80
vivod(u' <-zapr\n')
continue
fuzxe=0
for k in range(len(res0)):
if res0[k].tip<80 and res0[k].iz==iz:
fuzxe=1
break
if fuzxe:
reso[j].tip2=80
vivod(u' <-uzxe\n')
continue
fpovt=0
k=0
while k<len(reso):
if reso[k].tip<80 and (k!=j) and (reso[k].iz==iz):
if k<j:
fpovt=1
break
perpodp1=reso[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=reso[k].njazisp
if perpodp1.t!=u'':
b = u' %s\n' % (perpodp1.t)
vivod(b)
k+=1
if fpovt:
reso[j].tip2=80
vivod(u' <-povt\n')
continue
reso[j].perpodp2=perpodp
reso[j].njazisp2=njazisp
for j in range(len(reso)):
reso[j].tip=reso[j].tip2
reso[j].perpodp=reso[j].perpodp2
reso[j].njazisp=reso[j].njazisp2
itbot.iskat(reso)
itbot.zagruz_info(reso,1)
kprefi=pl.site().family.image_namespace(pl.site().lang)
for j in range(len(reso)):
reso[j].tip2=reso[j].tip
if reso[j].tip>=80:
continue
iz=reso[j].iz
if (
#reso[j].tip>=50 and
(iskat(iz,u'flag')!=-1 or iskat(iz,u'Flag')!=-1 or
iskat(iz,u'FLAG')!=-1 or
iskat(iz,u'bandiera')!=-1 or
iskat(iz,u'Bandiera')!=-1 or
iskat(iz,u'BANDIERA')!=-1)):
reso[j].tip2=80
vivod(u' - flag %s\n'%iz)
continue
perpodp=reso[j].perpodp
njazisp=reso[j].njazisp
fuzxe=0
for k in range(len(res0)):
if res0[k].tip<80 and (res0[k].iz==iz or res0[k].dl==reso[j].dl):
fuzxe=1
break
if fuzxe:
reso[j].tip2=80
vivod(u' <-uzxe2\n')
continue
fpovt=0
for k in range(len(reso)):
if (reso[k].tip<80 and (k!=j) and
(reso[k].iz==iz or reso[k].dl==reso[j].dl)):
if reso[k].tip<reso[j].tip or (reso[k].tip==reso[j].tip and k<j):
fpovt=1
break
if reso[k].iz==iz:
perpodp1=reso[k].perpodp
perpodp=objed_podp(perpodp,perpodp1)
njazisp+=reso[k].njazisp
if fpovt:
reso[j].tip2=80
vivod(u' <-povt2\n')
continue
reso[j].perpodp2=perpodp
reso[j].njazisp2=njazisp
for j in range(len(reso)):
reso[j].tip=reso[j].tip2
reso[j].perpodp=reso[j].perpodp2
reso[j].njazisp=reso[j].njazisp2
if reso[j].tip<80:
reso[j].prioritet=reso[j].njazisp*1000+1000-reso[j].tip
reso.sort(key=imagetransfer1.Izobr.vz_prioritet,reverse=True)
for j in range(len(reso)):
vivod(u' [[%s:%s]] dl=%d nj=%d tip=%d\n'%
(kprefi,reso[j].iz,reso[j].dl,reso[j].njazisp,reso[j].tip))
maksn_iz=4
jn=0
for j in range(len(reso)):
if reso[j].tip>=80:
continue
if reso[j].tip>=50 and jn+len(res0)>=maksn_iz:
reso[j].tip=90
vivod(u'-otbr %s\n'%reso[j].iz)
jn+=1
flkatcom=0
for j in range(len(reso)):
if reso[j].tip==50:
flkatcom=1
itbot.fkatcom_ns=0
if flkatcom or fproviz:
katcomsp=[]
katprob=[]
for (n,jaz) in nomio:
katprob.append( (n,jaz) )
prov_dobav_katcom(nscom,zapriz,katcomsp,n,jaz)
if len(katcomsp)<1:
shcom_v_katcom(nscom,zapriz,katcomsp,shcomi+shcomo)
if len(katcomsp)<1:
for ka in kati+kato:
katprob.append( (ka.n,ka.jaz) )
prov_dobav_katcom(nscom,zapriz,katcomsp,ka.n,ka.jaz)
if len(katcomsp)<1:
vivod(u'- katcom\n')
for (n1,jaz) in katprob:
n2=n1.replace(u'(',u' ').replace(u')',u' ')
for n in n2.split(u' '):
if len(n)>=4:
prov_dobav_katcom(nscom,zapriz,katcomsp,n,jaz)
if len(katcomsp)<1:
vivod(u'- katcom2\n')
itbot.fkatcom_ns=1
for (n,jaz) in katprob:
t=u'Category:'+n
t1=u'[['+t+u']]\n'
if (not t1 in katcomsp) and (not zapriz.has_key(n)):
katcomsp.append(t1)
katcom=u''
for t in katcomsp:
katcom+=t
itbot.katcom=katcom
vivod(u'========katcom=\n%s==========\n' % katcom)
if itbot.fkatcom_ns:
for j in range(len(reso)):
if reso[j].tip==50:
reso[j].tip=60
#! glavn kopir izobr
# if gl_dobav_iz and (not fproviz):
# itbot.kopir_izobr(reso)
nnov_iz=0
nnovdi_iz=0
jn=0
nt_iz=u''
ntdi_iz=u''
nt_izga=u'<gallery>\n'
ntdi_izga=u'<gallery>\n'
for j in range(len(reso)):
if reso[j].tip>=80:
continue
t=u''
if reso[j].perpodp.t!=u'':
t=u'|<!-- %s -->' % reso[j].perpodp.t
nt1= u'[[%s:%s|thumb|left|180px%s]]\n'%(kprefi,reso[j].iz,t)
nt2=(u'%s:%s|<!-- thumb|left|180px|%s -->\n'%
(kprefi,reso[j].iz,reso[j].perpodp.t))
if jn+len(res0)<maksn_iz:
nt_iz+=nt1
nt_izga+=nt2
nnov_iz+=1
fzam=1
else:
ntdi_iz+=nt1
ntdi_izga+=nt2
nnovdi_iz+=1
jn+=1
nt_izga+=u'</gallery>\n'
ntdi_izga+=u'</gallery>\n'
if nnov_iz>2:
nt_iz=nt_izga
if nnovdi_iz>2:
ntdi_iz=ntdi_izga
if nnov_iz:
nt_iz= u'<br clear=all>\n'+nt_iz
flbr=0
nt+=nt_tabl
nt+=nt_iz
if nt_iz!=u'':
flbr=1
if len(nt_ssilk)>5:
(pzssilk0,pzssilk1)=isk_zagolov(nt,jaz0,t_zag_vian)
if pzssilk0==pzssilk1:
if flbr:
nt+=u'<br clear=all>\n'
flbr=0
nt+=u'\n== '+gl_zagolov(jaz0,t_zag_vian)+u' ==\n'
pzssilk1=len(nt)
nt=nt[:pzssilk1]+nt_ssilk+nt[pzssilk1:]
nt+=nt_shcom
if nt_shcom!=u'':
flbr=1
if len(nt_eksl)>5:
(pzeksl0,pzeksl1)=isk_zagolov(nt,jaz0,t_zag_eksl)
if pzeksl0==pzeksl1:
if flbr:
nt+=u'<br clear=all>\n'
flbr=0
nt+=u'\n== '+gl_zagolov(jaz0,t_zag_eksl)+u' ==\n'
pzeksl1=len(nt)
nt=nt[:pzeksl1]+nt_eksl+nt[pzeksl1:]
if nnovdi_iz>0:
nt_shalka=u'{{%s 2}}'%wikipedia.translate(si0,te_al_bildoj)
if iskat(nt,nt_shalka)==-1:
nt+=nt_shalka+u'\n'
ntdi+=u'\n== %s ==\n'%wikipedia.translate(si0,te_al_bildoj)+ntdi_iz
fzam=1
if nnovdi_eksl>0:
nt_shalel=u'{{%s 2}}'%wikipedia.translate(si0,te_al_ekslj)
if iskat(nt,nt_shalel)==-1:
nt+=nt_shalel+u'\n'
ntdi+=u'\n== %s ==\n'%wikipedia.translate(si0,te_al_ekslj)+ntdi_eksl
fzam=1
nt_kat_sp=[]
glb=0
if len(kati)==0:
# nt_kat_sp=perev_kateg(slov,nssvoj,pl.site(),kato,1,otch)
nt_kat_sp=perev_kateg(slov,nssvoj,pl.site(),kato,0,otch)
nnov_kat=len(nt_kat_sp)
if nnov_kat>0:
fzam=1
nt_kat=kateg_v_tekst(nt_kat_sp)
nt+=nt_kat
if fproviz:
for j in range(len(res0)):
# if res0[j].tip>=80 or res0[j].nekatco:
if res0[j].nekatco:
tc=res0[j].plcot+u'\n\n'+katcom
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (res0[j].plco.title(),tc))
otch2.flush()
res0[j].plco.put(tc,comment = u'', minorEdit = False)
vivod(u'++ kat -> %s\n' % res0[j].plco.title())
if not fnajd:
return
if (not fproviz) and fzam:
if nt[len(nt)-1:]==u'\n':
nt=nt[:len(nt)-1]
# nt_stkat=wikipedia.replaceCategoryLinks(u'', ocat, pl.site())
# nt+=wikipedia.replaceLanguageLinks(nt_stkat, niwi)
nt+=u'\n'+wikipedia.replaceLanguageLinks(u'', niwi)
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (pl.title(),nt))
otch2.flush()
statis.ob+=1
t = u"AAld:"
if nnov_iz>0:
t+=u' + %d %s'%(nnov_iz,wikipedia.translate(si0,te_bildoj))
statis.iz+=1
if nnov_shcom>0:
t+=u' + %d %s'%(nnov_shcom,wikipedia.translate(si0,te_komunejoj))
statis.shcom+=1
if nnov_eksl>0:
t+=u' + %d %s'%(nnov_eksl,wikipedia.translate(si0,te_ekslj))
statis.eksl+=1
if nnov_ssilk>0:
t+=u' + %d %s'%(nnov_ssilk,wikipedia.translate(si0,te_ligoj))
statis.ssilk+=1
if nnov_tabl>0:
t+=u' + %d %s'%(nnov_tabl,wikipedia.translate(si0,te_tabeloj))
statis.tabl+=1
if nnov_kat>0:
t+=u' + %d %s'%(nnov_kat,wikipedia.translate(si0,te_kategorioj))
statis.kat+=1
if nnovdi_iz>0 or nnovdi_eksl>0:
t+=u' + (%s)'%wikipedia.translate(si0,te_vi_di)
vivod(t+u'\n')
try:
put_prov(pl,nt,t,False)
pass
except wikipedia.EditConflict:
vivod('!!!EditConflict!!!\n')
except wikipedia.LockedPage:
vivod('!!!LockedPage!!!\n')
vivod((u'statis: izm %d sta s iz %d shcom %d eksl %d ssilk %d '+
u'tabl %d kat %d\n')%(statis.ob,statis.iz,statis.shcom,
statis.eksl,
statis.ssilk,statis.tabl,statis.kat))
if (not fproviz) and (nnovdi_iz>0 or nnovdi_eksl>0):
# diprefi=pl.site().family.namespace(pl.site().lang, 1, '_default')
# pldi=wikipedia.Page(pl.site(),diprefi+u':'+pl.title())
pldi=wikipedia.Page(pl.site(),pl.title()+u'/'+
wikipedia.translate(si0,te_aldon))
try:
tdi = pldi.get()
vivod(u'+ %s\n' % pldi.title())
except wikipedia.NoPage:
tdi=u''
vivod(u'- %s\n' % pldi.title())
except wikipedia.IsRedirectPage:
tdi=u'--> [['+pldi.getRedirectTarget()+u']]\n----\n'
vivod(u'+ %s\n-> %s\n' % (pldi.title(),pldi.getRedirectTarget()))
ntdi+=u'\n[[%s]]\n'%wikipedia.translate(si0,te_aldon_kat)
tdi+=u'\n'+ntdi
otch2.write(u'%s\n+=======\n%s\n====================================\n' % (pldi.title(),ntdi))
otch2.flush()
# t = u"AAld (%s):"%wikipedia.translate(si0,te_diskuto)
t = u"AAld (%s):"%(wikipedia.translate(si0,te_aldon).lower())
if nnovdi_iz>0:
t+=u' + %d %s'%(nnovdi_iz,wikipedia.translate(si0,te_bildoj))
if nnovdi_eksl>0:
t+=u' + %d %s'%(nnovdi_eksl,wikipedia.translate(si0,te_ekslj))
vivod(t+u'\n')
try:
pldi.put(tdi, comment = t, minorEdit = False)
except wikipedia.EditConflict:
vivod('!!!EditConflict!!!\n')
except wikipedia.LockedPage:
vivod('!!!LockedPage!!!\n')
return
def zapis_fimen(fimen,nomerf,pl):
fimen.write(u'%d %s\n' % (nomerf,pl.title()))
fimen.flush()
def provstop(fstop):
return os.access(fstop,os.F_OK)
def ivsenmain():
fstop='ivsenstop.txt'
try:
os.unlink(fstop)
except:
pass
if provstop(fstop):
wikipedia.output(u'ivsenstop0!\n')
return
n=u''
skon=u''
nomerf=-1
f=codecs.open(filename+'.txt','rb',encoding='utf-8')
frl=f.readlines()
mysite = wikipedia.getSite()
slov={}
nscom={}
nssvoj={}
zapriz={}
zaprobr={}
fproviz=0
i=0
j=0
fl1=1
for s in frl:
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %d %s %s\n' % (i,j,n,s))
s=s.replace(u'\r',u'')
if s[len(s)-1]==u'\n':
s=s[:len(s)-1]
if fl1:
if s==u'':
fproviz=1
else:
slovdop={}
perevod.zagruzslov(slov,slovdop,nssvoj,u'',mysite.lang,s)
if slovdop.has_key(u'c'):
nscom=slovdop[u'c']
if slovdop.has_key(u'z'):
zapriz=slovdop[u'z']
if slovdop.has_key(u'zo'):
zaprobr=slovdop[u'zo']
fl1=0
else:
if len(s)>=1:
skon=s
frl=[]
f.close()
nomerf=0
i=0
while (i<len(skon)) and provcifr(skon[i]):
nomerf=nomerf*10+ord(skon[i])-ord(u'0')
i+=1
i=iskat(skon,u' ')
if i!=-1:
nomsta=skon[i+1:]
else:
nomsta=u'!'
nomerf+=1
global otch,otch2,otchnc,otchkc,otchup
otch = codecs.open(u'%s_ot%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otch2 = codecs.open(u'%s_op%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchnc = codecs.open(u'%s_nc%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchkc = codecs.open(u'%s_kc%06d.txt' % (filename,nomerf), 'w', 'utf-8')
otchup = codecs.open(u'%s_up%06d.txt' % (filename,nomerf), 'w', 'utf-8')
fimen=codecs.open(filename+'.txt','a',encoding='utf-8')
otch.write(u'fproviz=%d\n\n'%fproviz)
otch.flush()
statis=Statis()
paggen=pagegenerators.AllpagesPageGenerator(nomsta, 0)
paggeni=paggen.__iter__()
flperv=1
nkp=[]
maxnkp=64
# for pla in paggen:
while 1:
flkon=0
try:
pla=paggeni.next()
except:
flkon=1
# raise
if not flkon:
if flperv==1:
vivod(u'-uzxe %s\n'%pla.title())
flperv=0
continue
if zaprobr.has_key(pla.title()):
vivod(u'-zaprobr %s\n'%pla.title())
continue
nkp.append(pla)
if poslim!=u'' and pla.title().startswith(poslim):
vivod(u'poslim!\n')
flkon=1
elif len(nkp)<maxnkp:
continue
tbl={}
sozd_buf_tbl(tbl,nkp,otch)
nkpi=[]
tdliz=[]
for pl in nkp:
try:
pl=get_tbl(tbl,pl)
ot = pl.get()
pl_iz=image_podp(pl.site(),ubrkoment(ot))
vivod(u'%s len(pl_iz)=%d\n'%(pl.title(),len(pl_iz)))
for pli in vibor_iwi(pl.interwiki()):
nkpi.append(pli)
if len(pl_iz)==0:
tdliz.append(pli)
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
sozd_buf_tbl(tbl,nkpi,otch)
tiz=[]
if gl_dobav_iz:
for pl in tdliz:
try:
pl=get_tbl(tbl,pl)
ot = pl.get()
tiz+=image_podp(pl.site(),ubrkoment(ot))
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
nkpiz=[]
sico=wikipedia.getSite(code = u'commons',fam = u'commons')
msprefi=mysite.family.image_namespace(mysite.lang)
for izo in tiz:
ctit=u'Image:'+izo.iz
if nscom.has_key(ctit):
vivod(u'tc+ %s\n' % ctit)
else:
nkpiz.append(wikipedia.Page(sico,ctit))
sourceSite=wikipedia.getSite(izo.jaz,fam = mysite.family)
isprefi=sourceSite.family.image_namespace(izo.jaz)
nkpiz.append(wikipedia.Page(sourceSite,isprefi+u':'+izo.iz))
nkpiz.append(wikipedia.Page(mysite,msprefi+u':'+izo.iz))
sozd_buf_tbl(tbl,nkpiz,otch)
vivod(u'\n\n********************************\n\n\n')
for pl in nkp:
rabota(slov,nscom,nssvoj,zapriz,tbl,pl,fproviz,statis)
otch.write(u'\n\n\n')
otch.flush()
zapis_fimen(fimen,nomerf,pl)
vivod(u'\n\n********************************\n\n\n')
nkp=[]
if provstop(fstop):
wikipedia.output(u'ivsenstop!\n')
return
if flkon:
wikipedia.output(u'vsjo!\n')
return
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
poslim=u''
if len(sys.argv)>=3:
poslim = wikipedia.argHandler(sys.argv[2], 'cht_cat').replace(u'_',u' ')
try:
ivsenmain()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()
</nowiki>