Revision: 8546
Author: xqt
Date: 2010-09-13 09:44:40 +0000 (Mon, 13 Sep 2010)
Log Message:
-----------
define commonscatBot to a class; keep link titles as requested with bug #2787057; change page.aslink() to page.title(asLink=True)
Modified Paths:
--------------
trunk/pywikipedia/commonscat.py
Modified: trunk/pywikipedia/commonscat.py
===================================================================
--- trunk/pywikipedia/commonscat.py 2010-09-13 04:53:26 UTC (rev 8545)
+++ trunk/pywikipedia/commonscat.py 2010-09-13 09:44:40 UTC (rev 8546)
@@ -45,6 +45,7 @@
#
# (C) Multichill, 2008-2009
+# (C) Xqt, 2010
# (C) Pywikipedia bot team, 2008-2010
#
# Distributed under the terms of the MIT license.
@@ -166,214 +167,348 @@
'th': u'บอต: เปลี่ยนลิงก์หมวดหมู่คอมมอนส์จาก [[:Commons:Category:%(oldcat)s|%(oldcat)s]] เป็น [[:Commons:Category:%(newcat)s|%(newcat)s]]',
}
-def getCommonscatTemplate (lang = None):
- '''
- Get the template name in a language. Expects the language code.
- Return as tuple containing the primary template and it's alternatives
- '''
- if lang in commonscatTemplates:
- return commonscatTemplates[lang]
- else:
- return commonscatTemplates[u'_default']
+class CommonscatBot:
-def skipPage(page):
- '''
- Do we want to skip this page?
- '''
- if page.site().language() in ignoreTemplates:
- templatesInThePage = page.templates()
- templatesWithParams = page.templatesWithParams()
- for template in ignoreTemplates[page.site().language()]:
- if type(template) != tuple:
- if template in templatesInThePage:
- return True
- else:
- for (inPageTemplate, param) in templatesWithParams:
- if inPageTemplate == template[0] \
- and template[1] in param[0]:
- return True
- return False
+ def __init__(self, generator, always, summary=None):
+ self.generator = generator
+ self.always = always
+ self.dry = False
+ if summary == None:
+ self.summary = pywikibot.translate(pywikibot.getSite(), msg_change)
+ else:
+ self.summary = summary
-def updateInterwiki (wikipediaPage = None, commonsPage = None):
- '''
- Update the interwiki's at commons from a wikipedia page. The bot just
- replaces the interwiki links at the commons page with the interwiki's from
- the wikipedia page. This should probably be more intelligent. We could use
- add all the interwiki's and remove duplicates. Or only remove language links
- if multiple language links to the same language exist.
+ def run(self):
+ for page in self.generator:
+ self.treat(page)
- This function is disabled for the moment untill i figure out what the best
- way is to update the interwiki's.
- '''
- interwikis = {}
- comment= u''
- interwikilist = wikipediaPage.interwiki()
- interwikilist.append(wikipediaPage)
+ def treat(self, page):
+ """
+ Loads the given page, does some changes, and saves it.
+ """
+ if not page.exists():
+ pywikibot.output(u'Page %s does not exist. Skipping.'
+ % page.title(asLink=True))
+ elif page.isRedirectPage():
+ pywikibot.output(u'Page %s is a redirect. Skipping.'
+ % page.title(asLink=True))
+ elif page.isCategoryRedirect():
+ pywikibot.output(u'Page %s is a category redirect. Skipping.'
+ % page.title(asLink=True))
+ elif page.isDisambig():
+ pywikibot.output(u'Page %s is a disambiguation. Skipping.'
+ % page.title(asLink=True))
+ else:
+ (status, always) = self.addCommonscat(page)
+ return
+ #####
+ text = self.load(page)
+ if not text:
+ return
- for interwikiPage in interwikilist:
- interwikis[interwikiPage.site()]=interwikiPage
- oldtext = commonsPage.get()
- # The commonssite object doesnt work with interwiki's
- newtext = pywikibot.replaceLanguageLinks(oldtext, interwikis,
- pywikibot.getSite(u'nl'))
- comment = u'Updating interwiki\'s from [[' + \
- wikipediaPage.site().language() + \
- u':' + wikipediaPage.title() + u']]'
+ ################################################################
+ # NOTE: Here you can modify the text in whatever way you want. #
+ ################################################################
- if newtext != oldtext:
- #This doesnt seem to work. Newtext has some trailing whitespace
- pywikibot.showDiff(oldtext, newtext)
- commonsPage.put(newtext=newtext, comment=comment)
+ # If you find out that you do not want to edit this page, just return.
+ # Example: This puts the text 'Test' at the beginning of the page.
+ text = 'Test ' + text
-def addCommonscat (page = None, summary = None, always = False):
- '''
- Take a page. Go to all the interwiki page looking for a commonscat template.
- When all the interwiki's links are checked and a proper category is found
- add it to the page.
- '''
- pywikibot.output(u'Working on ' + page.title());
- #Get the right templates for this page
- primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(
- page.site().language())
- commonscatLink = getCommonscatLink (page)
- if commonscatLink:
- pywikibot.output(u'Commonscat template is already on %s'
- % page.title())
- (currentCommonscatTemplate, currentCommonscatTarget) = commonscatLink
- checkedCommonscatTarget = checkCommonscatLink(currentCommonscatTarget)
- if (currentCommonscatTarget==checkedCommonscatTarget):
- #The current commonscat link is good
- pywikibot.output(u'Commonscat link at %s to Category:%s is ok'
- % (page.title() , currentCommonscatTarget));
- return (True, always)
- elif checkedCommonscatTarget!=u'':
- #We have a new Commonscat link, replace the old one
- changeCommonscat(page, currentCommonscatTemplate,
- currentCommonscatTarget, primaryCommonscat,
- checkedCommonscatTarget)
- return (True, always)
+ if not self.save(text, page, self.summary):
+ pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
+
+ def load(self, page):
+ """
+ Loads the given page, does some changes, and saves it.
+ """
+ try:
+ # Load the page
+ text = page.get()
+ except pywikibot.NoPage:
+ pywikibot.output(u"Page %s does not exist; skipping."
+ % page.title(asLink=True))
+ except pywikibot.IsRedirectPage:
+ pywikibot.output(u"Page %s is a redirect; skipping."
+ % page.title(asLink=True))
else:
- #Commonscat link is wrong
- commonscatLink = findCommonscatLink(page)
- if (commonscatLink!=u''):
- changeCommonscat (page, currentCommonscatTemplate,
- currentCommonscatTarget, primaryCommonscat,
- commonscatLink)
- #else
- #Should i remove the commonscat link?
+ return text
+ return None
- elif skipPage(page):
- pywikibot.output("Found a template in the skip list. Skipping %s"
- % page.title());
- else:
- commonscatLink = findCommonscatLink(page)
- if (commonscatLink!=u''):
- textToAdd = u'{{' + primaryCommonscat + u'|' + commonscatLink + u'}}'
- (success, status, always) = add_text.add_text(page, textToAdd,
- summary, None, None,
- always);
- return (True, always);
+ def save(self, text, page, comment, minorEdit=True, botflag=True):
+ # only save if something was changed
+ if text != page.get():
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
+ # show what was changed
+ pywikibot.showDiff(page.get(), text)
+ pywikibot.output(u'Comment: %s' %comment)
+ if not self.dry:
+ if not self.always:
+ choice = pywikibot.inputChoice(
+ u'Do you want to accept these changes?',
+ ['Yes', 'No', 'Always', 'Quit'],
+ ['y', 'N', 'a', 'q'], 'N')
+ if choice == 'a':
+ always = True
+ elif choice == 'q':
+ import sys
+ sys.exit()
+ if self.always or choice == 'y':
+ try:
+ # Save the page
+ page.put(text, comment=comment,
+ minorEdit=minorEdit, botflag=botflag)
+ except pywikibot.LockedPage:
+ pywikibot.output(u"Page %s is locked; skipping."
+ % page.title(asLink=True))
+ except pywikibot.EditConflict:
+ pywikibot.output(
+ u'Skipping %s because of edit conflict'
+ % (page.title()))
+ except pywikibot.SpamfilterError, error:
+ pywikibot.output(
+u'Cannot change %s because of spam blacklist entry %s'
+ % (page.title(), error.url))
+ else:
+ return True
+ return False
- return (True, always);
-def changeCommonscat (page=None, oldtemplate=u'', oldcat=u'', newtemplate=u'',
- newcat=u''):
- '''
- Change the current commonscat template and target.
- '''
- newtext = re.sub(u'(?i)\{\{' + oldtemplate + u'\|?[^}]*\}\}',
- u'{{' + newtemplate + u'|' + newcat + u'}}',
- page.get())
- comment = pywikibot.translate(page.site(), msg_change) % {'oldcat':oldcat, 'newcat':newcat}
- pywikibot.showDiff(page.get(), newtext)
- page.put(newtext, comment)
+ def getCommonscatTemplate (self, lang = None):
+ '''
+ Get the template name in a language. Expects the language code.
+ Return as tuple containing the primary template and it's alternatives
+ '''
+ if lang in commonscatTemplates:
+ return commonscatTemplates[lang]
+ else:
+ return commonscatTemplates[u'_default']
-def findCommonscatLink (page=None):
- for ipage in page.interwiki():
- try:
- if(ipage.exists() and not ipage.isRedirectPage()
- and not ipage.isDisambig()):
- commonscatLink = getCommonscatLink (ipage)
- if commonscatLink:
- (currentCommonscatTemplate, possibleCommonscat) = commonscatLink
- checkedCommonscat = checkCommonscatLink(possibleCommonscat)
- if (checkedCommonscat!= u''):
- pywikibot.output(
- u"Found link for %s at [[%s:%s]] to %s."
- % (page.title(), ipage.site().language(),
- ipage.title(), checkedCommonscat))
- return checkedCommonscat
- except pywikibot.BadTitle:
- #The interwiki was incorrect
- return u''
- return u''
+ def skipPage(self, page):
+ '''
+ Do we want to skip this page?
+ '''
+ if page.site().language() in ignoreTemplates:
+ templatesInThePage = page.templates()
+ templatesWithParams = page.templatesWithParams()
+ for template in ignoreTemplates[page.site().language()]:
+ if type(template) != tuple:
+ if template in templatesInThePage:
+ return True
+ else:
+ for (inPageTemplate, param) in templatesWithParams:
+ if inPageTemplate == template[0] \
+ and template[1] in param[0]:
+ return True
+ return False
+ def updateInterwiki (self, wikipediaPage = None, commonsPage = None):
+ '''
+ Update the interwiki's at commons from a wikipedia page. The bot just
+ replaces the interwiki links at the commons page with the interwiki's from
+ the wikipedia page. This should probably be more intelligent. We could use
+ add all the interwiki's and remove duplicates. Or only remove language links
+ if multiple language links to the same language exist.
-def getCommonscatLink (wikipediaPage=None):
- '''
- Go through the page and return a tuple of (<templatename>, <target>)
- '''
- primaryCommonscat, commonscatAlternatives = getCommonscatTemplate(
- wikipediaPage.site().language())
- commonscatTemplate =u''
- commonscatTarget = u''
- #See if commonscat is present
+ This function is disabled for the moment untill i figure out what the best
+ way is to update the interwiki's.
+ '''
+ interwikis = {}
+ comment= u''
+ interwikilist = wikipediaPage.interwiki()
+ interwikilist.append(wikipediaPage)
- for template in wikipediaPage.templatesWithParams():
- if template[0]==primaryCommonscat \
- or template[0] in commonscatAlternatives:
- commonscatTemplate = template[0]
- if (len(template[1]) > 0):
- commonscatTarget = template[1][0]
+ for interwikiPage in interwikilist:
+ interwikis[interwikiPage.site()]=interwikiPage
+ oldtext = commonsPage.get()
+ # The commonssite object doesnt work with interwiki's
+ newtext = pywikibot.replaceLanguageLinks(oldtext, interwikis,
+ pywikibot.getSite(u'nl'))
+ comment = u'Updating interwiki\'s from [[' + \
+ wikipediaPage.site().language() + \
+ u':' + wikipediaPage.title() + u']]'
+
+ if newtext != oldtext:
+ #This doesnt seem to work. Newtext has some trailing whitespace
+ pywikibot.showDiff(oldtext, newtext)
+ commonsPage.put(newtext=newtext, comment=comment)
+
+ def addCommonscat (self, page):
+ '''
+ Take a page. Go to all the interwiki page looking for a commonscat template.
+ When all the interwiki's links are checked and a proper category is found
+ add it to the page.
+ '''
+ always = self.always
+ pywikibot.output(u'Working on ' + page.title());
+ #Get the right templates for this page
+ primaryCommonscat, commonscatAlternatives = self.getCommonscatTemplate(
+ page.site().language())
+ commonscatLink = self.getCommonscatLink (page)
+ if commonscatLink:
+ pywikibot.output(u'Commonscat template is already on %s'
+ % page.title())
+ (currentCommonscatTemplate, currentCommonscatTarget, LinkText, Note) = commonscatLink
+ checkedCommonscatTarget = self.checkCommonscatLink(currentCommonscatTarget)
+ if (currentCommonscatTarget==checkedCommonscatTarget):
+ #The current commonscat link is good
+ pywikibot.output(u'Commonscat link at %s to Category:%s is ok'
+ % (page.title() , currentCommonscatTarget));
+ return (True, always)
+ elif checkedCommonscatTarget!=u'':
+ #We have a new Commonscat link, replace the old one
+ self.changeCommonscat(page, currentCommonscatTemplate,
+ currentCommonscatTarget, primaryCommonscat,
+ checkedCommonscatTarget, LinkText, Note)
+ return (True, always)
else:
- commonscatTarget = wikipediaPage.titleWithoutNamespace()
- return (commonscatTemplate, commonscatTarget)
+ #Commonscat link is wrong
+ commonscatLink = self.findCommonscatLink(page)
+ if (commonscatLink!=u''):
+ self.changeCommonscat (page, currentCommonscatTemplate,
+ currentCommonscatTarget, primaryCommonscat,
+ commonscatLink)
+ #else
+ #Should i remove the commonscat link?
- return None
+ elif self.skipPage(page):
+ pywikibot.output("Found a template in the skip list. Skipping %s"
+ % page.title());
+ else:
+ commonscatLink = self.findCommonscatLink(page)
+ if (commonscatLink!=u''):
+ textToAdd = u'{{' + primaryCommonscat + u'|' + commonscatLink + u'}}'
+ (success, status, always) = add_text.add_text(page, textToAdd,
+ self.summary, None, None,
+ self.always);
+ return (True, always);
-def checkCommonscatLink (name = ""):
- '''
- This function will retun the name of a valid commons category
- If the page is a redirect this function tries to follow it.
- If the page doesnt exists the function will return an empty string
- '''
- if pywikibot.verbose:
- pywikibot.output("getCommonscat: " + name )
- try:
- #This can throw a pywikibot.BadTitle
- commonsPage = pywikibot.Page(pywikibot.getSite("commons", "commons"),
- "Category:" + name)
+ return (True, always);
- if not commonsPage.exists():
- if pywikibot.verbose:
- pywikibot.output(u"getCommonscat: The category doesnt exist.")
- return u''
- elif commonsPage.isRedirectPage():
- if pywikibot.verbose:
- pywikibot.output(u"getCommonscat: The category is a redirect")
- return checkCommonscatLink(
- commonsPage.getRedirectTarget().titleWithoutNamespace())
- elif "Category redirect" in commonsPage.templates():
- if pywikibot.verbose:
- pywikibot.output(
- u"getCommonscat: The category is a category redirect")
- for template in commonsPage.templatesWithParams():
- if ((template[0]=="Category redirect")
- and (len(template[1]) > 0)):
- return checkCommonscatLink(template[1][0])
- elif commonsPage.isDisambig():
- if pywikibot.verbose:
- pywikibot.output(
- u"getCommonscat: The category is disambiguation")
- return u''
+ def changeCommonscat (self, page=None, oldtemplate=u'', oldcat=u'', newtemplate=u'',
+ newcat=u'', linktitle=u'', description=u''):
+ ''' Change the current commonscat template and target. '''
+ if not linktitle and (page.title().lower() in oldcat.lower() or
+ oldcat.lower() in page.title().lower()):
+ linktitle = oldcat
+ if linktitle and newcat <> page.titleWithoutNamespace():
+ newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
+ % oldtemplate,
+ u'{{%s|%s|%s}}' % (newtemplate, newcat, linktitle),
+ page.get())
+ elif newcat == page.titleWithoutNamespace():
+ newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
+ % oldtemplate,
+ u'{{%s}}' % newtemplate,
+ page.get())
else:
- return commonsPage.titleWithoutNamespace()
- except pywikibot.BadTitle:
- #Funky title so not correct
+ newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
+ %oldtemplate,
+ u'{{%s|%s}}' % (newtemplate, newcat),
+ page.get())
+ comment = pywikibot.translate(page.site(), msg_change) \
+ % {'oldcat':oldcat, 'newcat':newcat}
+ self.save(newtext, page, comment)
+
+ def findCommonscatLink (self, page=None):
+ for ipage in page.interwiki():
+ try:
+ if(ipage.exists() and not ipage.isRedirectPage()
+ and not ipage.isDisambig()):
+ commonscatLink = self.getCommonscatLink (ipage)
+ if commonscatLink:
+ (currentTemplate, possibleCommonscat, linkText, Note) = commonscatLink
+ checkedCommonscat = self.checkCommonscatLink(possibleCommonscat)
+ if (checkedCommonscat!= u''):
+ pywikibot.output(
+ u"Found link for %s at [[%s:%s]] to %s."
+ % (page.title(), ipage.site().language(),
+ ipage.title(), checkedCommonscat))
+ return checkedCommonscat
+ except pywikibot.BadTitle:
+ #The interwiki was incorrect
+ return u''
return u''
- except pywikibot.PageNotFound:
- return u''
+
+ def getCommonscatLink (self, wikipediaPage=None):
+ '''
+ Go through the page and return a tuple of (<templatename>, <target>)
+ '''
+ primaryCommonscat, commonscatAlternatives = self.getCommonscatTemplate(
+ wikipediaPage.site().language())
+ commonscatTemplate = u''
+ commonscatTarget = u''
+ commonscatLinktext = u''
+ commonscatNote = u''
+ #See if commonscat is present
+
+ for template in wikipediaPage.templatesWithParams():
+ if template[0]==primaryCommonscat \
+ or template[0] in commonscatAlternatives:
+ commonscatTemplate = template[0]
+ if (len(template[1]) > 0):
+ commonscatTarget = template[1][0]
+ if len(template[1]) > 1:
+ commonscatLinktext = template[1][1]
+ if len(template[1]) > 2:
+ commonscatNote = template[1][2]
+ else:
+ commonscatTarget = wikipediaPage.titleWithoutNamespace()
+ print commonscatTemplate
+ print commonscatTarget
+ print commonscatLinktext
+ print commonscatNote
+ return (commonscatTemplate, commonscatTarget, commonscatLinktext,
+ commonscatNote)
+
+ return None
+
+ def checkCommonscatLink (self, name = ""):
+ '''
+ This function will retun the name of a valid commons category
+ If the page is a redirect this function tries to follow it.
+ If the page doesnt exists the function will return an empty string
+ '''
+ if pywikibot.verbose:
+ pywikibot.output("getCommonscat: " + name )
+ try:
+ #This can throw a pywikibot.BadTitle
+ commonsPage = pywikibot.Page(pywikibot.getSite("commons", "commons"),
+ "Category:" + name)
+
+ if not commonsPage.exists():
+ if pywikibot.verbose:
+ pywikibot.output(u"getCommonscat: The category doesnt exist.")
+ return u''
+ elif commonsPage.isRedirectPage():
+ if pywikibot.verbose:
+ pywikibot.output(u"getCommonscat: The category is a redirect")
+ return checkCommonscatLink(
+ commonsPage.getRedirectTarget().titleWithoutNamespace())
+ elif "Category redirect" in commonsPage.templates():
+ if pywikibot.verbose:
+ pywikibot.output(
+ u"getCommonscat: The category is a category redirect")
+ for template in commonsPage.templatesWithParams():
+ if ((template[0]=="Category redirect")
+ and (len(template[1]) > 0)):
+ return self.checkCommonscatLink(template[1][0])
+ elif commonsPage.isDisambig():
+ if pywikibot.verbose:
+ pywikibot.output(
+ u"getCommonscat: The category is disambiguation")
+ return u''
+ else:
+ return commonsPage.titleWithoutNamespace()
+ except pywikibot.BadTitle:
+ #Funky title so not correct
+ return u''
+ except pywikibot.PageNotFound:
+ return u''
+
def main():
'''
Parse the command line arguments and get a pagegenerator to work on.
@@ -413,22 +548,9 @@
u'You have to specify the generator you want to use for the script!')
pregenerator = pagegenerators.PreloadingGenerator(generator)
+ bot = CommonscatBot(pregenerator, always, summary)
+ bot.run()
- for page in pregenerator:
- if not page.exists():
- pywikibot.output(u'Page %s does not exist. Skipping.'
- % page.aslink())
- elif page.isRedirectPage():
- pywikibot.output(u'Page %s is a redirect. Skipping.' % page.aslink())
- elif page.isCategoryRedirect():
- pywikibot.output(u'Page %s is a category redirect. Skipping.'
- % page.aslink())
- elif page.isDisambig():
- pywikibot.output(u'Page %s is a disambiguation. Skipping.'
- % page.aslink())
- else:
- (status, always) = addCommonscat(page, summary, always)
-
if __name__ == "__main__":
try:
main()