Материал из Википедии — свободной энциклопедии
# -*- coding: UTF-8 -*-
__version__='$Id:'
import sys, codecs, re
import wikipedia, date, catlib
try:
# fkat = wikipedia.argHandler(sys.argv[1], 'cht_cat')
# fzap = wikipedia.argHandler(sys.argv[2], 'cht_cat')
# fotch = wikipedia.argHandler(sys.argv[3], 'cht_cat')
fi = wikipedia.argHandler(sys.argv[1], 'cht_cat')
fkat = fi+'.txt'
fzap = fi
fotch = fi+'_ot.txt'
mysite = wikipedia.getSite()
f=codecs.open(fkat,'r',encoding='utf-8')
otch = codecs.open(fotch, 'w', 'utf-8')
t1=f.readlines()
i=0
for line in t1:
if ord(line[0]) == 65279:
line=line[1:]
workingcatname = line
wikipedia.output(workingcatname)
pl = wikipedia.Page(mysite,wikipedia.UnicodeToAsciiHtml(workingcatname))
ple=pl.exists()
if ple:
try:
t = pl.get()
vertfile = codecs.open(u'%s%06d.txt' % (fzap,i), 'w', 'utf-8')
vertfile.write(pl.title() + '\n')
vertfile.write(t + '\n')
vertfile.flush()
vertfile.close()
b = u'%06d %s\n' % (i,pl.title())
wikipedia.output(b)
otch.write(b)
otch.flush()
i=i+1
except wikipedia.IsRedirectPage:
b = u'+ %s\n-> %s\n' % (pl.title(),pl.getRedirectTarget())
wikipedia.output(b)
otch.write(b)
otch.flush()
else:
b = u'- %s\n' % pl.title()
wikipedia.output(b)
otch.write(b)
otch.flush()
finally:
wikipedia.stopme()
try:
excludefile.close()
except:
pass