Benutzer:Bot-Schafter/Exzellent
aus Wikipedia, der freien Enzyklopädie
English:
This is the modified code of featured.py, part of the pywikipedia framework. Most likely this is not the lastest version - ask here for it if you want it. You will need also two additinal files to run the script: la.py and lang.py. If you want to copy code from here - click on "Seite bearbeiten" (edit) first and copy the source of the page.
Español:
Este esta el archivo de featured.py - los archivos la.py y lang.py también están necesario. Si tu quieres usar lo - tienes que copiar es texto fuente y no lo que ves! Pero mejor si me preguntas por la ultima versión.
""" To Do: struktur vereinfachen geblockte seiten überspringen anzeige eindeutiger machen - welche seite wird mit welcher vervollständigt eine datei für alle sprachen - außer die lang.py """ #!/usr/bin/python # -*- coding: utf-8 -*- __version__ = '$Id: featured.py,v 1.12 2006/03/15 02:57:06 wikipedian Exp $' from __future__ import generators import sys, re import pagegenerators, wikipedia, catlib, config from la import fixes from lang import msg, template, featured_name #++++++++++++++++++++++++++++ class ReplaceRobot: """ A bot that can do text replacements. """ def __init__(self, generator, replacements, exceptions = [], acceptall = False): self.generator = generator self.replacements = replacements self.exceptions = exceptions self.acceptall = acceptall def checkExceptions(self, text): """ If one of the exceptions applies for the given text, returns the substring which matches the exception. Otherwise it returns None. """ for exception in self.exceptions: hit = exception.search(text) if hit: return hit.group(0) return None def doReplacements(self, text): """ Returns the text which is generated by applying all replacements to the given text. """ #new_text = text for old, new in self.replacements: text = wikipedia.replaceExceptMathNowikiAndComments(text, old, new) return text def run(self): global text, new_text, atrans, h """ Starts the robot. """ h = '0' # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: # try: # # Load the page's text from the wiki # original_text = page.get() #page.get() # wikipedia.output(u'%s' % (original_text)) if not page.canBeEdited(): wikipedia.output(u'Skipping locked page %s' % page.title()) h = '1' # except wikipedia.NoPage: # wikipedia.output(u'Page %s not found' % page.title()) # continue # except wikipedia.IsRedirectPage: # original_text = page.get(get_redirect=True) match = self.checkExceptions(text) # skip all pages that contain certain texts if match: print 'enthält irgend eine Exception' wikipedia.output(u'Skipping %s because it contains %s' % (atrans.title(), match)) else: new_text = self.doReplacements(text) if new_text == text: wikipedia.output('No changes were necessary in %s' % atrans.title()) else: wikipedia.output(u'\n>>> %s <<<' % atrans.title()) wikipedia.showDiff(text, new_text) # if not self.acceptall: # choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') # if choice in ['a', 'A']: # self.acceptall = True # if self.acceptall or choice in ['y', 'Y']: # wikipedia.output(u'seitentext gespeichert') text = new_text print 'erste Korrektur durchgelaufen' # try: # page.put(new_text) # except wikipedia.EditConflict: # wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) #------------------------- def mainsort(): global text, rtl findtemplate = wikipedia.translate(wikipedia.getSite(), template) wikitext = text m = 1 i = 0 li1 = ['a'] li2 = ['a'] li3 = ['a'] if re.search('Vorlage:%s\' % findtemplate, wikitext): while li1[i]: li1[i] = re.search('Vorlage:%s\' % findtemplate, wikitext) # li1[i] <- xx p = re.search('\[\[%s:(.*?)\]\]' % li1[i].group(1), wikitext) # p <- Article old = re.compile("Vorlage:%s\" % (findtemplate, li1[i].group(1))) # old <- for removing wikitext = wikipedia.replaceExceptMathNowikiAndComments(wikitext, old, '<--entfFeat-->') # removing and putting temporary placeholder while re.search(old, wikitext): wikitext = wikipedia.replaceExceptMathNowikiAndComments(wikitext, old, '<--entfFeat-->') if p: # check if there is also the article and not only the link fa for xx p = p.group(1) p = wikipedia.replaceExceptMathNowikiAndComments(p, '\(', '\\\(') p = wikipedia.replaceExceptMathNowikiAndComments(p, '\)', '\\\)') li2[i]=re.compile("\[\[%s:%s\]\]" % (li1[i].group(1), p)) # w <- xx:Article li3[i] = re.compile("Vorlage:%s" % (findtemplate, li1[i].group(1))) # old <- for adding i += 1 li1 = li1 + ['a'] li2 = li2 + ['a'] li3 = li3 + ['a'] li1[i] = re.search('Vorlage:%s\' % findtemplate, wikitext) # to check if another run is necessary else: print 'Link FA ohne Link' i = 0 while li1[i]: print li2[i].pattern q = li2[i].search(wikitext) # searching for xx:Article if q: if (config.mylang in (rtl)) and (fromsite.lang not in (rtl)): wikitext=(wikitext[:q.start()] # text untill beginning of xx:Article + ("%s" % li3[i].pattern) # adding + wikitext[q.start():]) # adding xx:Article and the rest else: wikitext=(wikitext[:q.end()] # text untill end of xx:Article + ("%s" % li3[i].pattern) # adding + wikitext[q.end():]) # adding the rest i += 1 while m: h = re.search(r'(?i)([\r\n]{1,2}) *?<--entfFeat--> *?', wikitext) if h: wikitext = wikipedia.replaceExceptMathNowikiAndComments(wikitext, r'(?i)([\r\n]{1,2}) *?<--entfFeat--> *?', r"") # removing the placehoder including \n else: wikitext = wikipedia.replaceExceptMathNowikiAndComments(wikitext, r'<--entfFeat-->', r"") m = re.search('<--entfFeat-->', wikitext) text = wikitext print 'featured articles have to be moved' else: sys.exit(1) else: print 'no featured article templates to be moved' def maincorr():# ------------------template corrections gen = None # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # Don't edit pages which contain certain texts. exceptions = [] # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). if config.mylang == 'es': fix = 'es' print 'sprache ist es' elif config.mylang =='sv': fix = 'SV' else: fix = 'FA' print 'sprache ist FA...' # pages which will be processed when the -page parameter is used PageTitles = [] # a page whose links will be processed when the -links parameter is used linkingPageTitle = None # will become True when the user presses a ('yes to all') or uses the -always # commandline paramater. acceptall = False # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Which page to start startpage = None # Load default summary message. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg)) # Read commandline parameters. for arg in wikipedia.handleArgs(): PageTitles.append(arg[6:]) source = 'specificPages' if (len(commandline_replacements) == 2 and fix == None): replacements.append((commandline_replacements[0], commandline_replacements[1])) wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')') else: # Perform one of the predefined actions. try: fix = fixes[fix] except KeyError: wikipedia.output(u'Available predefined fixes are: %s' % fixes.keys()) wikipedia.stopme() sys.exit() if fix.has_key('regex'): regex = fix['regex'] if fix.has_key('msg'): wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), fix['msg'])) if fix.has_key('exceptions'): exceptions = fix['exceptions'] replacements = fix['replacements'] # already compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, re.UNICODE) replacements[i] = oldR, new for i in range(len(exceptions)): exception = exceptions[i] if not regex: exception = re.escape(exception) exceptionR = re.compile(exception, re.UNICODE) exceptions[i] = exceptionR if PageTitles: pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 50) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall) bot.run() #+++++++++++++++++++++++ interactive=0 nocache=0 afterpage=u"!" try: import pickle cache=pickle.load(file("featured/cache","rb")) except: cache={} def featuredArticles(site): method=featured_name[site.lang][0] name=featured_name[site.lang][1] args=featured_name[site.lang][2:] raw=method(site, name, *args) arts=[] for p in raw: if p.namespace()==0: # Article arts.append(p) elif p.namespace()==1: # Article talk (like in English) arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace())) return arts def findTranslated(page, oursite=None): if not oursite: oursite=wikipedia.getSite() if page.isRedirectPage(): page=wikipedia.Page(page.site(), page.getRedirectTarget()) try: iw=page.interwiki() except: wikipedia.output(u"no interwiki, giving up") return None ourpage=None for p in iw: if p.site()==oursite: ourpage=p break if not ourpage: wikipedia.output(u"No corresponding page in "+`oursite`)# % (fromsite.lang)) , a.title() return None if not ourpage.exists(): wikipedia.output(u"Our page doesn't exist: "+ourpage.title()) return None if ourpage.isRedirectPage(): ourpage=wikipedia.Page(ourpage.site(),ourpage.getRedirectTarget()) wikipedia.output(u"Corresponding page is "+ourpage.title()) if ourpage.namespace() != 0: wikipedia.output(u"...not in the main namespace, skipping") return None if ourpage.isRedirectPage(): wikipedia.output(u"double redirect, skipping") return None if not ourpage.exists(): wikipedia.output(u"page doesn't exist, skipping") return None try: iw=ourpage.interwiki() except: return None backpage=None for p in iw: if p.site()==page.site(): backpage=p break if not backpage: wikipedia.output(u"no back interwiki ref") return None if backpage==page: # everything is ok return ourpage if backpage.isRedirectPage(): backpage=wikipedia.Page(backpage.site(),backpage.getRedirectTarget()) if backpage==page: # everything is ok return ourpage wikipedia.output(u"back interwiki ref target is "+backpage.title()) return None def featuredWithInterwiki(fromsite, tosite): global text, new_text, atrans, h, rtl rtl = '\'he\', \'ar\', \'ur\', \'fa\', \'yi\', \'arc\', \'dv\', \'ks\', \'mzn\', \'ps\', \'sd\ if not fromsite.lang in cache: cache[fromsite.lang]={} if not tosite.lang in cache[fromsite.lang]: cache[fromsite.lang][tosite.lang]={} cc=cache[fromsite.lang][tosite.lang] if nocache: cc={} findtemplate = wikipedia.translate(wikipedia.getSite(), template) re_Link_FA=re.compile(ur"\{\{%s\|%s\}\}" % (findtemplate, fromsite.lang)) re_this_iw=re.compile(ur"\[\[%s:[^]]+\]\]" % fromsite.lang) arts=featuredArticles(fromsite) pairs=[] for a in arts: if a.title()<afterpage: continue if u"/" in a.title(): wikipedia.output(u"%s is a subpage" % a.title()) continue if a.title() in cc: wikipedia.output(u"(cached) %s -> %s"%(a.title(), cc[a.title()])) continue if a.isRedirectPage(): a=wikipedia.Page(a.site(),a.getRedirectTarget()) try: if not a.exists(): wikipedia.output(u"source page doesn't exist: %s" % a.title()) continue atrans=findTranslated(a,tosite) if atrans: text=atrans.get() # ganzen Artikel in text speichern print 'jetzt müsste er austauschen' maincorr() m=re_Link_FA.search(text) # link-FA im text suchen und existens in m speichern if m: # wert von m (bei FA|xx existiert, dann m==1) prüfen wikipedia.output(u"(already done)") else: # insert just before interwiki ++++++++++++ (schreiben des eintrages) ++++++++++++++++++++++++++++++++++ # print text mainsort() # --------------------einträge sortieren if (not interactive or wikipedia.input(u'Connecting %s -> %s. Proceed? [Y/N]'%(a.title(), atrans.title())) in ['Y','y'] ): m=re_this_iw.search(text) # raussuchen des xx:artikel if not m: wikipedia.output(u"no interwiki record, very strange") continue if (config.mylang in (rtl)) and (fromsite.lang not in (rtl)): text=(text[:m.start()] #alles was bis zum interwikilink steht + (u"Vorlage:%s" % (findtemplate, fromsite.lang)) # FA einfügen + text[m.start():]) #alles was noch danach kommt else: text=(text[:m.end()] #alles was bis zum interwikilink steht + (u"Vorlage:%s" % (findtemplate, fromsite.lang)) # FA einfügen + text[m.end():]) #alles was noch danach kommt comment = wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg) % (fromsite.lang, a.title())) #kommentar für die history print text if h == 1: wikipedia.output(u'Skipping locked page (end)') if (not interactive or wikipedia.input(u'Connecting %s -> %s. Proceed? [Y/N]'%(a.title(), atrans.title())) in ['Y','y'] ): atrans.put(text, comment) #abspeichern else: print 'seite nicht gesperrt, und trotzdem hat er irgendwas' cc[a.title()]=atrans.title() except wikipedia.PageNotSaved, e: wikipedia.output(u"Page not saved") if __name__=="__main__":# keine nutzung auf nl if config.usernames.has_key('wikipedia') and config.usernames['wikipedia'].has_key('nl'): print "Bot is not to be used at NL Wikipedias." sys.exit() # argumente der command line auswerten und in variablen speichern fromlang=[] for arg in sys.argv[1:]: arg = wikipedia.argHandler(arg, 'featured') if not arg: pass elif arg == '-interactive': interactive=1 elif arg == '-nocache': nocache=1 elif arg.startswith('-fromlang:'): fromlang=arg[10:].split(",") if len(fromlang)==1 and fromlang[0].index("-")>=0: ll1,ll2=fromlang[0].split("-",1) if not ll1: ll1="" if not ll2: ll2="zzzzzzz" fromlang=[ll for ll in featured_name.keys() if ll>=ll1 and ll<=ll2] elif arg == '-fromall': fromlang=featured_name.keys() elif arg.startswith('-after:'): afterpage=arg[7:] # falls keine argument if not fromlang: print """usage: featured [-interactive] [-nocache] [-fromlang:xx,yy|-fromall] [-after:]""" sys.exit(1) # wenn alle "-fromall" dann sortiert er hier fromlang.sort() try: for ll in fromlang: fromsite=wikipedia.Site(ll) if not fromsite==wikipedia.getSite(): featuredWithInterwiki(fromsite, wikipedia.getSite()) # ende und cache speichern finally: wikipedia.stopme() if not nocache: import pickle pickle.dump(cache,file("featured/cache","wb")) print 'juhuuu - fertig'