# -*- coding: utf-8 -*- """ This script can be used to remove none exist images from pages Syntax: python remove_none_exist_images.py [options] Command line options: -summary: Provide a custom edit summary. If the summary includes spaces, surround it with single quotes, such as: -summary:'My edit summary' -always Don't prompt to make changes, just do them. -namespace:n Number of namespace to process. The parameter can be used multiple times. Example: python remove_none_exist_images.py -always -namespace:0 """ __version__ = '$Id:$' import wikipedia as pywikibot import pagegenerators, replace, re, query # # Distributed under the terms of the MIT license. # class remove_none_exist_images: """ This robot will remeve none exist files from pages """ # Edit Summary messages msg_remove={ 'en': u'Robot: Removing none exist file %s', 'fa': u'��������: ������ ������������ �������������� %s', } def __init__(self,summary='',always=False,namespaces=[]): """ Arguments: * summary - A custom edit summary. * always - Don't prompt to make changes, just do them. * namespaces - Array number of namespaces to process. """ self.always = always self.namespaces = namespaces mysite = pywikibot.getSite() self.site = mysite if summary: self.editSummary = summary else: self.editSummary = pywikibot.translate(mysite, self.msg_remove) def run(self): """ Starts the robot's action. """ params = { 'action': 'query', 'meta': 'allmessages', 'ammessages': 'broken-file-category', 'amenableparser': '', } self.categoryname = query.GetData(params, encodeTitle = True) self.categoryname = self.categoryname['query']['allmessages'][0]['*'] pywikibot.output(u"Getting list of pages from category '%s' ..." % self.categoryname) params = { 'action': 'query', 'list': 'categorymembers', 'cmlimit': 'max', 'cmtitle': u'Category:%s' % self.categoryname, } if (self.namespaces!=[]): params['cmnamespace'] = '|'.join(self.namespaces) self.pageslist = query.GetData(params, encodeTitle = True) for pageitem in self.pageslist['query']['categorymembers']: params = { 'action': 'query', 'prop': 'images', 'imlimit': 'max', 'titles': pageitem['title'], } imagelist = query.GetData(params, encodeTitle = True) for image in imagelist['query']['pages'].values()[0]['images']: params = { 'action': 'query', 'prop': 'imageinfo', 'titles': image['title'], } imagesinfo = query.GetData(params, encodeTitle = True) for imageinfo in imagesinfo['query']['pages'].values(): site = self.site imagename = re.match(r'(?:' + '|'.join(site.namespace(6, all = True))\ + ')\:(.*)', image['title']).group(1) try: if (imageinfo['missing']=="" and imageinfo['imagerepository']==""): pywikibot.output("Removing image '%s' ..." % imagename) self.remove_image(site,imagename) except: pywikibot.output("Skiping image '%s'..." % imagename) def remove_image(self,site,imagename): ImagePage = pywikibot.ImagePage(site,site.namespace(6)+':%s' % imagename) gen = pagegenerators.FileLinksGenerator(ImagePage) preloadingGen = pagegenerators.PreloadingGenerator(gen) if not site.nocapitalize: case = re.escape(imagename[0].upper() + imagename[0].lower()) escaped = '[' + case + ']' + re.escape(imagename[1:]) else: escaped = re.escape(imagename) escaped = re.sub('\\\\[_ ]', '[_ ]', escaped) for page in preloadingGen: if(self.namespaces == [] or page.namespace() in self.namespaces): try: original_text = page.get() new_text = re.sub(r'\[\[ *(?:' + '|'.join(site.namespace(6, all = True)) + ')\s*:\s*' \ + escaped + ' *(?:\|[^\n]+|) *\]\]',"",original_text) new_text = re.sub(r'' + escaped,"",new_text) if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) else: pywikibot.output(u">>> %s <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if not self.always: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All'],['y', 'N','a'], 'N') if choice == 'y': self.save_page(page, new_text, self.editSummary % imagename) if choice == 'a': self.always = True else: self.save_page(page, new_text, self.editSummary % imagename) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) def save_page(self,page,text,summary): try: page.put(text, summary) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) except pywikibot.SpamfilterError, e: pywikibot.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved, error: pywikibot.output(u'Error putting page: %s' % (error.args,)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),)) def main(): summary = '' always = False namespaces = [] # read command line parameters for arg in pywikibot.handleArgs(): if arg == '-always': always = True elif arg.startswith('-summary'): if len(arg) == len('-summary'): summary = pywikibot.input(u'Choose an edit summary: ') else: summary = arg[len('-summary:'):] elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg.startswith('-ns:'): try: namespaces.append(int(arg[4:])) except ValueError: namespaces.append(arg[4:]) bot = remove_none_exist_images(summary,always,namespaces) bot.run() if __name__ == "__main__": try: main() finally: pywikibot.stopme()