Материал из Википедии — свободной энциклопедии
# -*- coding: UTF-8 -*-
__version__='$Id:'
import wikipedia
import re, sys
import codecs
def iskat(t,s):
lt=len(t)
ls=len(s)
i=0
for a in t[:lt-ls]:
if t[i:i+ls]==s:
return i
i+=1
return -1
def sozd(n,ta1,t):
pl = wikipedia.Page(mysite,wikipedia.UnicodeToAsciiHtml(n))
wikipedia.output(pl.title())
ot = pl.get()
otch.write(u'=========================\n%s\n' % pl.title())
otch.flush()
otch2.write(u'=========================\n%s\n- %s\n' % (pl.title(),ot))
otch2.flush()
oiwi = pl.interwiki()
niwi = {}
for pl2 in oiwi:
niwi[pl2.site()] = pl2
nt = wikipedia.replaceLanguageLinks(ot, {})
ta1c = ta1[:1].capitalize() + ta1[1:]
tas=u'{{'+ta1+u'}}'
i = iskat(nt,tas)
l=len(tas)
if i == -1:
istr = u'{{'+ta1c+u'}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{msg:'+ta1+u'}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{msg:'+ta1c+u'}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{Msg:'+ta1+u'}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{Msg:'+ta1c+u'}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{Augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{msg:augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{msg:Augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{Msg:augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i == -1:
istr = u'{{Msg:Augusto}}'
l=len(istr)
i = iskat(nt,istr)
if i != -1:
b=u'- %s\n' % nt[i:i+l]
# wikipedia.output(b)
otch.write(b)
otch.flush()
nt=nt[:i]+nt[i+l:]
b=u'i1=%d\n' % i
wikipedia.output(b)
otch.write(b)
otch.flush()
if nt[0]==u'\n':
nt=nt[1:]
b=u'- \\n\n'
wikipedia.output(b)
otch.write(b)
otch.flush()
if nt[0]==u'\n':
nt=nt[1:]
b=u'- \\n\n'
wikipedia.output(b)
otch.write(b)
otch.flush()
if nt[0:5]==u'----\n':
nt=nt[5:]
b=u'- ----\\n\n'
wikipedia.output(b)
otch.write(b)
otch.flush()
if nt[0]==u'\n':
nt=nt[1:]
b=u'- \\n\n'
wikipedia.output(b)
otch.write(b)
otch.flush()
if nt[0]==u'\n':
nt=nt[1:]
b=u'- \\n\n'
wikipedia.output(b)
otch.write(b)
otch.flush()
nt=tas+u'\n'+nt
i = iskat(nt,u'----\nVidu anka')
if i == -1:
i = iskat(nt,u'Vidu anka')
b=u'i2=%d len=%d\n' % (i,len(nt))
wikipedia.output(b)
otch.write(b)
otch.flush()
if i != -1:
b=u'- %s\n' % nt[i:]
# wikipedia.output(b)
otch.write(b)
otch.flush()
nt = nt[:i]+t
else:
nt = nt+t
nt2 = wikipedia.replaceLanguageLinks(nt, niwi)
otch2.write(u'========\n+ %s\n' % nt2)
otch2.flush()
pl.put(nt2, comment = u"", minorEdit = False)
# wikipedia.output(u"+++<"+t+u">+++")
return
def main():
t=u''
n=u''
ta1=u''
f=codecs.open(filename,'rb',encoding='utf-8')
i=0
for s in f.readlines():
# wikipedia.output(u'%d\n' % ord(s[0]))
if ord(s[0]) == 65279:
s=s[1:]
# wikipedia.output(u'%d %s %s\n' % (i,n,s))
if s[0:7] == u'=======':
if i==2:
sozd(n,ta1[:len(ta1)-2],t)
i=0
t=u''
elif i==0:
n=s
i=1
elif i==1:
ta1=s
i=2
else:
t=t+s
# t.append(s)
# wikipedia.output(u"No title found - skipping a page.")
# text=''.join(text)
filename = wikipedia.argHandler(sys.argv[1], 'cht_cat')
fotch = wikipedia.argHandler(sys.argv[2], 'cht_cat')
fotch2 = wikipedia.argHandler(sys.argv[3], 'cht_cat')
for arg in sys.argv[3:]:
arg = wikipedia.argHandler(arg, 'pagefromfile')
if arg:
if arg=="-log":
import logger
sys.stdout = logger.Logger(sys.stdout, filename = 'pagefromfile.log')
mysite = wikipedia.getSite()
otch = codecs.open(fotch, 'w', 'utf-8')
otch2 = codecs.open(fotch2, 'w', 'utf-8')
try:
main()
except:
wikipedia.stopme()
raise
else:
wikipedia.stopme()